In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Data

In [2]:
!pip install yfinance
!pip install pandas_ta

Collecting pandas_ta
  Downloading pandas_ta-0.3.14b.tar.gz (115 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.1/115.1 kB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pandas_ta
  Building wheel for pandas_ta (setup.py) ... [?25l[?25hdone
  Created wheel for pandas_ta: filename=pandas_ta-0.3.14b0-py3-none-any.whl size=218909 sha256=adc29726ecfdfd5e04799731b4be881c700e27736c7a9239380e2732cf36a2b8
  Stored in directory: /root/.cache/pip/wheels/69/00/ac/f7fa862c34b0e2ef320175100c233377b4c558944f12474cf0
Successfully built pandas_ta
Installing collected packages: pandas_ta
Successfully installed pandas_ta-0.3.14b0


In [3]:
import random
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import math, copy, time
from torch.autograd import Variable
import matplotlib.pyplot as plt
import seaborn
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import Dataset
import tqdm
import yfinance as yf
from torch.utils.data import DataLoader
from torch.distributions import Categorical
import tqdm
from google.colab import runtime
# Finance Data
import pandas as pd
import pandas_ta as ta
from typing import List

import numpy as np
import pandas as pd
from pandas.tseries import offsets
from pandas.tseries.frequencies import to_offset
from torch.utils.data import DataLoader


seaborn.set_context(context="talk")
%matplotlib inline

In [4]:
class TimeFeature:
    def __init__(self):
        pass

    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        pass

    def __repr__(self):
        return self.__class__.__name__ + "()"


class SecondOfMinute(TimeFeature):
    """Minute of hour encoded as value between [-0.5, 0.5]"""

    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        return index.second / 59.0 - 0.5


class MinuteOfHour(TimeFeature):
    """Minute of hour encoded as value between [-0.5, 0.5]"""

    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        return index.minute / 59.0 - 0.5


class HourOfDay(TimeFeature):
    """Hour of day encoded as value between [-0.5, 0.5]"""

    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        return index.hour / 23.0 - 0.5


class DayOfWeek(TimeFeature):
    """Day of week encoded as value between [-0.5, 0.5]"""

    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        return index.dayofweek / 6.0 - 0.5


class DayOfMonth(TimeFeature):
    """Day of month encoded as value between [-0.5, 0.5]"""

    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        return (index.day - 1) / 30.0 - 0.5


class DayOfYear(TimeFeature):
    """Day of year encoded as value between [-0.5, 0.5]"""

    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        return (index.dayofyear - 1) / 365.0 - 0.5


class MonthOfYear(TimeFeature):
    """Month of year encoded as value between [-0.5, 0.5]"""

    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        return (index.month - 1) / 11.0 - 0.5


class WeekOfYear(TimeFeature):
    """Week of year encoded as value between [-0.5, 0.5]"""

    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        return (index.isocalendar().week - 1) / 52.0 - 0.5


def time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]:
    """
    Returns a list of time features that will be appropriate for the given frequency string.
    Parameters
    ----------
    freq_str
        Frequency string of the form [multiple][granularity] such as "12H", "5min", "1D" etc.
    """
    #print(f"freq_str: {freq_str}")

    features_by_offsets = {
        offsets.YearEnd: [],
        offsets.QuarterEnd: [MonthOfYear],
        offsets.MonthEnd: [MonthOfYear],
        offsets.Week: [DayOfMonth, WeekOfYear],
        offsets.Day: [DayOfWeek, DayOfMonth, DayOfYear],
        offsets.BusinessDay: [DayOfWeek, DayOfMonth, DayOfYear],
        offsets.Hour: [HourOfDay, DayOfWeek, DayOfMonth, DayOfYear],
        offsets.Minute: [
            MinuteOfHour,
            HourOfDay,
            DayOfWeek,
            DayOfMonth,
            DayOfYear,
        ],
        offsets.Second: [
            SecondOfMinute,
            MinuteOfHour,
            HourOfDay,
            DayOfWeek,
            DayOfMonth,
            DayOfYear,
        ],
    }

    offset = to_offset(freq_str)
    #print(offset)

    for offset_type, feature_classes in features_by_offsets.items():
        #print(f"offset_type: {offset_type}")
        if isinstance(offset, offset_type):
            #print(cls for cls in feature_classes)
            return [cls() for cls in feature_classes]

    supported_freq_msg = f"""
    Unsupported frequency {freq_str}
    The following frequencies are supported:
        Y   - yearly
            alias: A
        M   - monthly
        W   - weekly
        D   - daily
        B   - business days
        H   - hourly
        T   - minutely
            alias: min
        S   - secondly
    """
    raise RuntimeError(supported_freq_msg)


def time_features(dates, freq='h'):
    #print(f"Entered time_features!!")
    #print(dates)
    return np.vstack([feat(dates) for feat in time_features_from_frequency_str(freq)])


In [17]:
import pandas as pd
import numpy as np

def datetime_to_one_hot(datetime_values):
    # Convert input to a DatetimeIndex if it's not already
    if isinstance(datetime_values, pd.Series):
        datetime_values = pd.to_datetime(datetime_values)
    elif isinstance(datetime_values, pd.Index):
        datetime_values = pd.DatetimeIndex(datetime_values)
    else:
        raise ValueError("Input should be a Pandas Series or DatetimeIndex.")

    # Extract the day of the week (0=Monday, 6=Sunday)
    day_of_week = datetime_values.dayofweek

    # Extract the day of the month (1-31)
    day_of_month = datetime_values.day

    # Extract the week of the month (1-5)
    week_of_month = (datetime_values.day - 1) // 7 + 1

    # Extract the month of the year (1-12)
    month_of_year = datetime_values.month

    # One-hot encode each component
    one_hot_day_of_week = np.eye(7)[day_of_week]
    one_hot_day_of_month = np.eye(31)[day_of_month - 1]  # day_of_month ranges from 1-31, so subtract 1 for 0-based indexing
    one_hot_week_of_month = np.eye(5)[week_of_month - 1]  # week_of_month ranges from 1-5, so subtract 1 for 0-based indexing
    one_hot_month_of_year = np.eye(12)[month_of_year - 1]  # month_of_year ranges from 1-12, so subtract 1 for 0-based indexing

    # Concatenate all one-hot vectors along the second axis
    one_hot_combined = np.concatenate([one_hot_day_of_week, one_hot_day_of_month, one_hot_week_of_month, one_hot_month_of_year], axis=1)

    return one_hot_combined

In [5]:
import pandas as pd
import numpy as np

def datetime_to_one_hot(datetime_values):
    # Convert input to a DatetimeIndex if it's not already
    if isinstance(datetime_values, pd.Series):
        datetime_values = pd.to_datetime(datetime_values)
    elif isinstance(datetime_values, pd.Index):
        datetime_values = pd.DatetimeIndex(datetime_values)
    else:
        raise ValueError("Input should be a Pandas Series or DatetimeIndex.")

    # Extract the day of the week (0=Monday, 6=Sunday)
    day_of_week = datetime_values.dayofweek

    # Extract the day of the month (1-31)
    day_of_month = datetime_values.day

    # Extract the week of the month (1-5)
    week_of_month = (datetime_values.day - 1) // 7 + 1

    # Extract the month of the year (1-12)
    month_of_year = datetime_values.month

    # One-hot encode each component
    one_hot_day_of_week = np.eye(7)[day_of_week]
    one_hot_day_of_month = np.eye(31)[day_of_month - 1]  # day_of_month ranges from 1-31, so subtract 1 for 0-based indexing
    one_hot_week_of_month = np.eye(5)[week_of_month - 1]  # week_of_month ranges from 1-5, so subtract 1 for 0-based indexing
    one_hot_month_of_year = np.eye(12)[month_of_year - 1]  # month_of_year ranges from 1-12, so subtract 1 for 0-based indexing

    # Concatenate all one-hot vectors along the second axis
    one_hot_combined = np.concatenate([one_hot_day_of_week, one_hot_day_of_month, one_hot_week_of_month, one_hot_month_of_year], axis=1)

    return one_hot_combined

In [6]:
def extract_data(start_year, end_year=2023, ticker="^SPX"):
    data = yf.Ticker(ticker).history(period="max")
    data = data.dropna()

    def add_features(data):
      # Assuming your DataFrame is named 'data'
      data.ta.sma(close="Close", length=50, append=True)
      data.ta.sma(close="Close", length=200, append=True)
      #data.ta.ichimoku(close="Close", append=True)
      #data.ta.macd(close="Close", append=True)
      data.ta.rsi(close="Close", append=True)
      data.ta.bbands(close="Close", append=True)
      data.ta.macd(close="Close", append=True)
      data.ta.ichimoku(close="Close", append=True)
      data.ta.smi(close="Close", append=True)
      data.ta.willr(close="Close", low="Low", high="High", append=True)
      data.ta.stoch(close="Close", low="Low", high="High", append=True)
      data.ta.fisher(low="Low", high="High", append=True)
      data.ta.atr(low="Low", high="High", close="Close", append=True)
      #data.ta.cdl_pattern(name=['eveningstar', '3whitesoldiers', 'morningstar', '3blackcrows', '3linestrike'])
      data.ta.obv(volume="Volume", close="Close", append=True)
      data.ta.zscore(close="Close", append=True)
      data.ta.entropy(close="Close", append=True)
      return data

    data = add_features(data)
    ## Columns to Drop
    drop = ['Volume', 'Dividends', 'Stock Splits']
    data = data.drop(drop, axis=1)
    data = data.dropna()
    start_year = data.index[0].year if start_year is None else start_year
    data = data[data.index.year >= start_year-1] if start_year is not None else data
    data = data[data.index.year <= end_year] if end_year is not None else data
    #print(f"start year: {start_year}")
    #print(data)
    return data

df = extract_data(1990)
#df = df['Close']
df

Unnamed: 0_level_0,Open,High,Low,Close,SMA_50,SMA_200,RSI_14,BBL_5_2.0,BBM_5_2.0,BBU_5_2.0,...,SMIo_5_20_5,WILLR_14,STOCHk_14_3_3,STOCHd_14_3_3,FISHERT_9_1,FISHERTs_9_1,ATRr_14,OBV,ZS_30,ENTP_10
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1989-01-03 00:00:00-05:00,277.720001,277.720001,273.809998,275.309998,274.762200,268.111900,48.838065,274.613084,277.267993,279.922903,...,-0.057734,-77.409688,54.631409,58.669202,0.388937,0.797978,2.233492,2.680319e+10,0.229301,3.325722
1989-01-04 00:00:00-05:00,275.309998,279.750000,275.309998,279.429993,274.677599,268.165350,58.288271,274.697225,277.787994,280.878764,...,-0.011009,-15.361706,54.945716,57.522858,0.248036,0.388937,2.391100,2.695289e+10,1.197195,3.325502
1989-01-05 00:00:00-05:00,279.429993,281.510010,279.429993,280.010010,274.632200,268.221200,59.424554,274.949366,278.373999,281.798632,...,0.014795,-19.480489,62.582706,57.386610,0.507648,0.248036,2.368880,2.712693e+10,1.312146,3.326466
1989-01-06 00:00:00-05:00,280.010010,282.059998,280.010010,280.670013,274.598000,268.280000,60.735306,274.775049,278.628003,282.480957,...,0.030888,-16.848292,82.769838,66.766086,0.908969,0.507648,2.346102,2.728826e+10,1.453896,3.327660
1989-01-09 00:00:00-05:00,280.670013,281.890015,280.320007,280.980011,274.590000,268.368150,61.366579,275.167730,279.280005,283.392280,...,0.037330,-13.090746,83.526824,76.293122,1.352505,0.908969,2.290666,2.745144e+10,1.477710,3.329150
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-22 00:00:00-05:00,4753.919922,4772.939941,4736.770020,4754.629883,4468.218584,4335.861046,71.069402,4694.534676,4741.732031,4788.929386,...,-0.054860,-10.098875,80.663261,83.908053,2.832599,2.986470,41.184150,1.379242e+12,1.638294,3.350524
2023-12-26 00:00:00-05:00,4758.859863,4784.720215,4758.450195,4774.750000,4477.157988,4340.426846,72.704374,4694.610700,4748.570020,4802.529339,...,-0.035086,-4.185293,90.737754,85.128169,2.967104,2.832599,40.391734,1.381756e+12,1.736890,3.350531
2023-12-27 00:00:00-05:00,4773.450195,4785.390137,4768.899902,4781.580078,4485.316992,4345.055946,73.256889,4692.545294,4751.212012,4809.878729,...,-0.019366,-1.730508,94.661774,88.687597,3.241961,2.967104,38.684484,1.384504e+12,1.720615,3.349759
2023-12-28 00:00:00-05:00,4786.439941,4793.299805,4780.979980,4783.350098,4493.519990,4349.376246,73.407118,4738.605198,4768.212012,4797.818826,...,-0.009495,-4.538277,96.515307,93.971612,3.584504,3.241961,36.801294,1.387203e+12,1.610700,3.346839


In [7]:
import os
import pandas as pd
import torch
import numpy as np
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler

class StockDataset(Dataset):
    def __init__(self, tickers, flag='train', size=None, one_hot_datetime=False,
                 features='S', target='Close', scale=True, timeenc=0, freq='d', batch_size=5,
                 data_start_year=1990, data_end_year=2023):
        if size is None:
            self.seq_len = 24 * 4 * 4
            self.label_len = 24 * 4
            self.pred_len = 24 * 4
        else:
            self.seq_len = size[0]
            self.label_len = size[1]
            self.pred_len = size[2]

        assert flag in ['train', 'test', 'val']
        type_map = {'train': 0, 'val': 1, 'test': 2}
        self.set_type = type_map[flag]
        self.data_start_year = data_start_year
        self.data_end_year = data_end_year

        self.features = features
        self.target = target
        self.scale = scale
        self.one_hot_datetime = one_hot_datetime
        self.timeenc = timeenc
        self.freq = freq
        self.scaler = StandardScaler()
        self.tickers = tickers.split()
        self.ticker_database = {}
        self.batch_size = batch_size
        self.min_year = 0
        self.__read_data__()

    def __len__(self):
        return len(self.years) - 1

    def get_min_year(self):
        min_year = 0
        for ticker in self.tickers:
            self.ticker_database[ticker] = extract_data(start_year=None, ticker=ticker)
            min_year = max(min_year, self.ticker_database[ticker].index.year.min())
        return min_year + 2

    def __read_data__(self):
        print(f"Loading following tickers: {self.tickers}\n")

        self.min_year = max(self.get_min_year(), self.data_start_year)
        print(f"Dataset Start Year: {self.min_year} | End Year: {self.data_end_year}")
        for ticker in self.tickers:
            self.ticker_database[ticker] = extract_data(start_year=self.min_year, end_year=self.data_end_year, ticker=ticker)
            self.ticker_database[ticker]['year'] = self.ticker_database[ticker].index.year

        self.years = self.ticker_database[self.tickers[0]]['year'].unique()
        print(f"years: {self.years}")
        self.data_by_year = {year: {ticker: self.ticker_database[ticker][self.ticker_database[ticker]['year'] == year] for ticker in self.tickers} for year in self.years}

        self.data_len = len(self.years)
        print(f"DateTime is one-hot: {self.one_hot_datetime}")

    def __getitem__(self, idx):
        """
        Final item form is a list containing [seq_x, seq_y, seq_x_mark, seq_y_mark, seq_x_dates, seq_y_dates]
        batches_x = batches[0]
        batches_y = batches[1]
        batches_x_mark = batches[2]
        batches_y_mark = batches[3]
        batches_x_dates = batches[4]
        batches_y_dates = batches[5]

        Each batches, contain batch data of size (batch_size, seq_len, num_features)
        For example, if seq_len = 24, batch_size = 5, num_features = 32,
        Each item of batches_x is a tensor of (5, 24, 32)
        """
        year = self.min_year + idx if idx >= 0 else self.max_year + idx + 1
        #print(f"Stock Dataset Yeaer: {year}")
        raw_datas = []

        for ticker in self.tickers:
            if year - 1 in self.data_by_year:
                prev_year_data = self.data_by_year[year - 1][ticker].tail(self.seq_len + self.pred_len - 1)
                #print(f"Prev year data for {ticker} in {year}: {prev_year_data.index}")
            else:
                print(f"Previous Year Data is Insufficient, Year: {year-1}, Ticker: {ticker}")
                prev_year_data = pd.DataFrame()

            ticker_data = pd.concat([prev_year_data, self.data_by_year[year][ticker]])
            #print(f"Ticker data for {ticker} in {year}: {ticker_data.index}")
            ticker_data['date'] = ticker_data.index
            raw_datas.append(ticker_data)
            """print(f"Ticker: {ticker}")
            print(len(ticker_data))
            print(ticker_data)"""

        seq_x = []
        seq_y = []
        seq_x_mark = []
        seq_y_mark = []
        seq_x_dates = []
        seq_y_dates = []

        for item in raw_datas:
            x, y, x_mark, y_mark, x_dates, y_dates = self.make_data(item)
            seq_x.append(x)
            seq_y.append(y)
            seq_x_mark.append(x_mark)
            seq_y_mark.append(y_mark)
            seq_x_dates.append(x_dates)
            seq_y_dates.append(y_dates)

        # Combine all tickers into a single batch and slice them into mini-batches
        #print(len(seq_x))
        return self.create_batches(seq_x, seq_y, seq_x_mark, seq_y_mark, seq_x_dates, seq_y_dates), year

    def make_data(self, raw_data):
        cols = list(raw_data.columns)
        cols.remove(self.target)
        cols.remove('date')
        cols.remove('year')
        raw_data = raw_data[['date'] + cols + [self.target]]

        if self.features == 'MS':
            cols_data = raw_data.columns[1:]
            data = raw_data[cols_data]
        elif self.features == 'S':
            data = raw_data['Close']

        if self.scale:
            data = self.scaler.fit_transform(data.values)

        if self.one_hot_datetime:
          #print(type(pd.to_datetime(raw_data['date'].values)))
          data_stamp = datetime_to_one_hot(pd.to_datetime(raw_data['date'].values))
          #print(one_hot_data_stamp.shape)
        else:
          data_stamp = time_features(pd.to_datetime(raw_data['date'].values), freq=self.freq)
          data_stamp = data_stamp.transpose(1, 0)

        """print("----Yearly data----")
        print(data[0].shape)
        print(len(data))
        print(data)"""

        seq_x = []
        seq_y = []
        seq_x_mark = []
        seq_y_mark = []
        x_dates = []
        y_dates = []

        for i in range(len(data) - self.pred_len - self.seq_len + 1):
            s_begin = i
            s_end = s_begin + self.seq_len
            r_begin = s_end - self.label_len
            r_end = r_begin + self.label_len + self.pred_len

            seq_x.append(data[s_begin:s_end])
            seq_y.append(data[r_begin:r_end])
            seq_x_mark.append(data_stamp[s_begin:s_end])
            seq_y_mark.append(data_stamp[r_begin:r_end])
            x_dates.append(raw_data['date'][s_begin:s_end])
            y_dates.append(raw_data['date'][r_begin:r_end])
            """print(f"data[s_begin:s_end]: {data[s_begin:s_end]}")
            print(f"data[r_begin:r_end]: {data[r_begin:r_end]}")
            print(f"raw_data['date'][s_begin:s_end]: {raw_data['date'][s_begin:s_end]}")
            print(f"raw_data['date'][r_begin:r_end]: {raw_data['date'][r_begin:r_end]}")
            print("=======================================")"""

        return torch.tensor(np.array(seq_x)), torch.tensor(np.array(seq_y)), torch.tensor(np.array(seq_x_mark)), torch.tensor(np.array(seq_y_mark)), x_dates, y_dates

    def create_batches(self, seq_x, seq_y, seq_x_mark, seq_y_mark, seq_x_dates, seq_y_dates, dates=None):
        batches_x = []
        batches_y = []
        batches_x_mark = []
        batches_y_mark = []
        batches_x_dates = []
        batches_y_dates = []
        batch_size = self.batch_size
        #print(dates[-1])
        for x, y, x_mark, y_mark, x_dates, y_dates in zip(seq_x, seq_y, seq_x_mark, seq_y_mark, seq_x_dates, seq_y_dates):
            #print(x.shape)
            for i in range(0, x.shape[0], batch_size):
                batches_x.append(x[i:i + batch_size])
                batches_y.append(y[i:i + batch_size])
                #print(f"x[i:i + batch_size]: {x[i:i + batch_size].shape}")
                #print(f"y[i:i + batch_size]: {y[i:i + batch_size].shape}")
                batches_x_mark.append(x_mark[i:i + batch_size])
                batches_y_mark.append(y_mark[i:i + batch_size])
                batches_x_dates.append(x_dates[i:i + batch_size])
                batches_y_dates.append(y_dates[i:i + batch_size])
                """print(f"x[i:i + batch_size]: {x[i:i + batch_size]}")
                print(f"y[i:i + batch_size]: {y[i:i + batch_size]}")
                print(f"x_dates[i:i + batch_size]: {x_dates[i:i + batch_size]}")
                print(f"y_dates[i:i + batch_size]: {y_dates[i:i + batch_size]}")
                print(f"------------------------------------------------------")"""

        return batches_x, batches_y, batches_x_mark, batches_y_mark, batches_x_dates, batches_y_dates

    def inverse_transform(self, data):
        return self.scaler.inverse_transform(data)

In [8]:
dataset = StockDataset(tickers='^SPX',timeenc=1, freq='d', size=[36, 18, 1], features='MS')

Loading following tickers: ['^SPX']

Dataset Start Year: 1990 | End Year: 2023
years: [1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002
 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016
 2017 2018 2019 2020 2021 2022 2023]
DateTime is one-hot: False


In [9]:
batches, year = dataset[0]

# Model

## PatchTST

In [10]:
import torch
import torch.nn as nn

class RevIN(nn.Module):
    def __init__(self, num_features: int, eps=1e-5, affine=True, subtract_last=False):
        """
        :param num_features: the number of features or channels
        :param eps: a value added for numerical stability
        :param affine: if True, RevIN has learnable affine parameters
        """
        super(RevIN, self).__init__()
        self.num_features = num_features
        self.eps = eps
        self.affine = affine
        self.subtract_last = subtract_last
        if self.affine:
            self._init_params()

    def forward(self, x, mode:str):
        if mode == 'norm':
            self._get_statistics(x)
            x = self._normalize(x)
        elif mode == 'denorm':
            x = self._denormalize(x)
        else: raise NotImplementedError
        return x

    def _init_params(self):
        # initialize RevIN params: (C,)
        self.affine_weight = nn.Parameter(torch.ones(self.num_features))
        self.affine_bias = nn.Parameter(torch.zeros(self.num_features))

    def _get_statistics(self, x):
        dim2reduce = tuple(range(1, x.ndim-1))
        if self.subtract_last:
            self.last = x[:,-1,:].unsqueeze(1)
        else:
            self.mean = torch.mean(x, dim=dim2reduce, keepdim=True).detach()
        self.stdev = torch.sqrt(torch.var(x, dim=dim2reduce, keepdim=True, unbiased=False) + self.eps).detach()

    def _normalize(self, x):
        if self.subtract_last:
            x = x - self.last
        else:
            x = x - self.mean
        x = x / self.stdev
        if self.affine:
            x = x * self.affine_weight
            x = x + self.affine_bias
        return x

    def _denormalize(self, x):
        if self.affine:
            x = x - self.affine_bias
            x = x / (self.affine_weight + self.eps*self.eps)
        x = x * self.stdev
        if self.subtract_last:
            x = x + self.last
        else:
            x = x + self.mean
        return x

In [11]:
import torch
from torch import nn
import math

class Transpose(nn.Module):
    def __init__(self, *dims, contiguous=False):
        super().__init__()
        self.dims, self.contiguous = dims, contiguous
    def forward(self, x):
        if self.contiguous: return x.transpose(*self.dims).contiguous()
        else: return x.transpose(*self.dims)


def get_activation_fn(activation):
    if callable(activation): return activation()
    elif activation.lower() == "relu": return nn.ReLU()
    elif activation.lower() == "gelu": return nn.GELU()
    raise ValueError(f'{activation} is not available. You can use "relu", "gelu", or a callable')


# decomposition

class moving_avg(nn.Module):
    """
    Moving average block to highlight the trend of time series
    """
    def __init__(self, kernel_size, stride):
        super(moving_avg, self).__init__()
        self.kernel_size = kernel_size
        self.avg = nn.AvgPool1d(kernel_size=kernel_size, stride=stride, padding=0)

    def forward(self, x):
        # padding on the both ends of time series
        front = x[:, 0:1, :].repeat(1, (self.kernel_size - 1) // 2, 1)
        end = x[:, -1:, :].repeat(1, (self.kernel_size - 1) // 2, 1)
        x = torch.cat([front, x, end], dim=1)
        x = self.avg(x.permute(0, 2, 1))
        x = x.permute(0, 2, 1)
        return x


class series_decomp(nn.Module):
    """
    Series decomposition block
    """
    def __init__(self, kernel_size):
        super(series_decomp, self).__init__()
        self.moving_avg = moving_avg(kernel_size, stride=1)

    def forward(self, x):
        moving_mean = self.moving_avg(x)
        res = x - moving_mean
        return res, moving_mean



# pos_encoding

def PositionalEncoding(q_len, d_model, normalize=True):
    pe = torch.zeros(q_len, d_model)
    position = torch.arange(0, q_len).unsqueeze(1)
    div_term = torch.exp(torch.arange(0, d_model, 2) * -(math.log(10000.0) / d_model))
    pe[:, 0::2] = torch.sin(position * div_term)
    pe[:, 1::2] = torch.cos(position * div_term)
    if normalize:
        pe = pe - pe.mean()
        pe = pe / (pe.std() * 10)
    return pe

SinCosPosEncoding = PositionalEncoding

def Coord2dPosEncoding(q_len, d_model, exponential=False, normalize=True, eps=1e-3, verbose=False):
    x = .5 if exponential else 1
    i = 0
    for i in range(100):
        cpe = 2 * (torch.linspace(0, 1, q_len).reshape(-1, 1) ** x) * (torch.linspace(0, 1, d_model).reshape(1, -1) ** x) - 1
        pv(f'{i:4.0f}  {x:5.3f}  {cpe.mean():+6.3f}', verbose)
        if abs(cpe.mean()) <= eps: break
        elif cpe.mean() > eps: x += .001
        else: x -= .001
        i += 1
    if normalize:
        cpe = cpe - cpe.mean()
        cpe = cpe / (cpe.std() * 10)
    return cpe

def Coord1dPosEncoding(q_len, exponential=False, normalize=True):
    cpe = (2 * (torch.linspace(0, 1, q_len).reshape(-1, 1)**(.5 if exponential else 1)) - 1)
    if normalize:
        cpe = cpe - cpe.mean()
        cpe = cpe / (cpe.std() * 10)
    return cpe

def positional_encoding(pe, learn_pe, q_len, d_model):
    # Positional encoding
    if pe == None:
        W_pos = torch.empty((q_len, d_model)) # pe = None and learn_pe = False can be used to measure impact of pe
        nn.init.uniform_(W_pos, -0.02, 0.02)
        learn_pe = False
    elif pe == 'zero':
        W_pos = torch.empty((q_len, 1))
        nn.init.uniform_(W_pos, -0.02, 0.02)
    elif pe == 'zeros':
        W_pos = torch.empty((q_len, d_model))
        nn.init.uniform_(W_pos, -0.02, 0.02)
    elif pe == 'normal' or pe == 'gauss':
        W_pos = torch.zeros((q_len, 1))
        torch.nn.init.normal_(W_pos, mean=0.0, std=0.1)
    elif pe == 'uniform':
        W_pos = torch.zeros((q_len, 1))
        nn.init.uniform_(W_pos, a=0.0, b=0.1)
    elif pe == 'lin1d': W_pos = Coord1dPosEncoding(q_len, exponential=False, normalize=True)
    elif pe == 'exp1d': W_pos = Coord1dPosEncoding(q_len, exponential=True, normalize=True)
    elif pe == 'lin2d': W_pos = Coord2dPosEncoding(q_len, d_model, exponential=False, normalize=True)
    elif pe == 'exp2d': W_pos = Coord2dPosEncoding(q_len, d_model, exponential=True, normalize=True)
    elif pe == 'sincos': W_pos = PositionalEncoding(q_len, d_model, normalize=True)
    else: raise ValueError(f"{pe} is not a valid pe (positional encoder. Available types: 'gauss'=='normal', \
        'zeros', 'zero', uniform', 'lin1d', 'exp1d', 'lin2d', 'exp2d', 'sincos', None.)")
    return nn.Parameter(W_pos, requires_grad=learn_pe)

In [81]:
from typing import Callable, Optional
import torch
from torch import nn
from torch import Tensor
import torch.nn.functional as F
import numpy as np


# Cell
class PatchTST_backbone(nn.Module):
    def __init__(self, c_in:int, context_window:int, target_window:int, patch_len:int, stride:int, max_seq_len:Optional[int]=1024,
                 n_layers:int=3, d_model=128, n_heads=16, d_k:Optional[int]=None, d_v:Optional[int]=None,
                 d_ff:int=256, norm:str='BatchNorm', attn_dropout:float=0., dropout:float=0., act:str="gelu", key_padding_mask:bool='auto',
                 padding_var:Optional[int]=None, attn_mask:Optional[Tensor]=None, res_attention:bool=True, pre_norm:bool=False, store_attn:bool=False,
                 pe:str='zeros', learn_pe:bool=True, fc_dropout:float=0., head_dropout = 0, padding_patch = None,
                 pretrain_head:bool=False, head_type = 'flatten', individual = False, revin = True, affine = True, subtract_last = False,
                 verbose:bool=False, **kwargs):

        super().__init__()

        # RevIn
        self.revin = revin
        if self.revin: self.revin_layer = RevIN(c_in, affine=affine, subtract_last=subtract_last)

        # Patching
        self.patch_len = patch_len
        self.stride = stride
        self.padding_patch = padding_patch
        patch_num = int((context_window - patch_len)/stride + 1)
        if padding_patch == 'end': # can be modified to general case
            self.padding_patch_layer = nn.ReplicationPad1d((0, stride))
            patch_num += 1

        # Backbone
        self.backbone = TSTiEncoder(c_in, patch_num=patch_num, patch_len=patch_len, max_seq_len=max_seq_len,
                                n_layers=n_layers, d_model=d_model, n_heads=n_heads, d_k=d_k, d_v=d_v, d_ff=d_ff,
                                attn_dropout=attn_dropout, dropout=dropout, act=act, key_padding_mask=key_padding_mask, padding_var=padding_var,
                                attn_mask=attn_mask, res_attention=res_attention, pre_norm=pre_norm, store_attn=store_attn,
                                pe=pe, learn_pe=learn_pe, verbose=verbose, **kwargs)

        # Head
        self.head_nf = d_model * patch_num
        self.n_vars = c_in
        self.pretrain_head = pretrain_head
        self.head_type = head_type
        self.individual = individual



        if self.pretrain_head:
            self.head = self.create_pretrain_head(self.head_nf, c_in, fc_dropout) # custom head passed as a partial func with all its kwargs
        elif head_type == 'flatten':
            self.head = Flatten_Head(self.individual, self.n_vars, self.head_nf, target_window, head_dropout=head_dropout)


    def forward(self, z):                                                                   # z: [bs x nvars x seq_len]
        # norm
        if self.revin:
            z = z.permute(0,2,1)
            z = self.revin_layer(z, 'norm')
            z = z.permute(0,2,1)


        # do patching
        if self.padding_patch == 'end':
            z = self.padding_patch_layer(z)
        z = z.unfold(dimension=-1, size=self.patch_len, step=self.stride)                   # z: [bs x nvars x patch_num x patch_len]
        z = z.permute(0,1,3,2)                                                              # z: [bs x nvars x patch_len x patch_num]

        # model
        z = self.backbone(z)                                                                # z: [bs x nvars x d_model x patch_num]
        z = self.head(z)                                                                    # z: [bs x nvars x target_window]

        # denorm
        if self.revin:
            z = z.permute(0,2,1)
            z = self.revin_layer(z, 'denorm')
            z = z.permute(0,2,1)

        #print(f"z output shape: {z.shape}")
        return z

    def create_pretrain_head(self, head_nf, vars, dropout):
        return nn.Sequential(nn.Dropout(dropout),
                    nn.Conv1d(head_nf, vars, 1)
                    )


class Flatten_Head(nn.Module):
    def __init__(self, individual, n_vars, nf, target_window, head_dropout=0):
        super().__init__()

        self.individual = individual
        self.n_vars = n_vars

        if self.individual:
            self.linears = nn.ModuleList()
            self.dropouts = nn.ModuleList()
            self.flattens = nn.ModuleList()
            for i in range(self.n_vars):
                self.flattens.append(nn.Flatten(start_dim=-2))
                self.linears.append(nn.Linear(nf, target_window))
                self.dropouts.append(nn.Dropout(head_dropout))
        else:
            self.flatten = nn.Flatten(start_dim=-2)
            self.linear = nn.Linear(nf, target_window)
            self.dropout = nn.Dropout(head_dropout)

    def forward(self, x):                                 # x: [bs x nvars x d_model x patch_num]
        if self.individual:
            x_out = []
            for i in range(self.n_vars):
                z = self.flattens[i](x[:,i,:,:])          # z: [bs x d_model * patch_num]
                z = self.linears[i](z)                    # z: [bs x target_window]
                z = self.dropouts[i](z)
                x_out.append(z)
            x = torch.stack(x_out, dim=1)                 # x: [bs x nvars x target_window]
        else:
            x = self.flatten(x)
            x = self.linear(x)
            x = self.dropout(x)
        return x

class TSTiEncoder(nn.Module):  #i means channel-independent
    def __init__(self, c_in, patch_num, patch_len, max_seq_len=1024,
                 n_layers=3, d_model=128, n_heads=16, d_k=None, d_v=None,
                 d_ff=256, norm='BatchNorm', attn_dropout=0., dropout=0., act="gelu", store_attn=False,
                 key_padding_mask='auto', padding_var=None, attn_mask=None, res_attention=True, pre_norm=False,
                 pe='zeros', learn_pe=True, verbose=False, **kwargs):


        super().__init__()

        self.patch_num = patch_num
        self.patch_len = patch_len

        # Input encoding
        q_len = patch_num
        self.W_P = nn.Linear(patch_len, d_model)        # Eq 1: projection of feature vectors onto a d-dim vector space
        self.seq_len = q_len

        # Positional encoding
        self.W_pos = positional_encoding(pe, learn_pe, q_len, d_model)

        # Residual dropout
        self.dropout = nn.Dropout(dropout)

        # Encoder
        self.encoder = TSTEncoder(q_len, d_model, n_heads, d_k=d_k, d_v=d_v, d_ff=d_ff, norm=norm, attn_dropout=attn_dropout, dropout=dropout,
                                   pre_norm=pre_norm, activation=act, res_attention=res_attention, n_layers=n_layers, store_attn=store_attn)


    def forward(self, x) -> Tensor:                                              # x: [bs x nvars x patch_len x patch_num]

        n_vars = x.shape[1]
        # Input encoding
        x = x.permute(0,1,3,2)                                                   # x: [bs x nvars x patch_num x patch_len]
        x = self.W_P(x)                                                          # x: [bs x nvars x patch_num x d_model]

        u = torch.reshape(x, (x.shape[0]*x.shape[1],x.shape[2],x.shape[3]))      # u: [bs * nvars x patch_num x d_model]
        u = self.dropout(u + self.W_pos)                                         # u: [bs * nvars x patch_num x d_model]

        # Encoder
        z = self.encoder(u)                                                      # z: [bs * nvars x patch_num x d_model]
        z = torch.reshape(z, (-1,n_vars,z.shape[-2],z.shape[-1]))                # z: [bs x nvars x patch_num x d_model]
        z = z.permute(0,1,3,2)                                                   # z: [bs x nvars x d_model x patch_num]

        return z



# Cell
class TSTEncoder(nn.Module):
    def __init__(self, q_len, d_model, n_heads, d_k=None, d_v=None, d_ff=None,
                        norm='BatchNorm', attn_dropout=0., dropout=0., activation='gelu',
                        res_attention=False, n_layers=1, pre_norm=False, store_attn=False):
        super().__init__()

        self.layers = nn.ModuleList([TSTEncoderLayer(q_len, d_model, n_heads=n_heads, d_k=d_k, d_v=d_v, d_ff=d_ff, norm=norm,
                                                      attn_dropout=attn_dropout, dropout=dropout,
                                                      activation=activation, res_attention=res_attention,
                                                      pre_norm=pre_norm, store_attn=store_attn) for i in range(n_layers)])
        self.res_attention = res_attention

    def forward(self, src:Tensor, key_padding_mask:Optional[Tensor]=None, attn_mask:Optional[Tensor]=None):
        output = src
        scores = None
        if self.res_attention:
            for mod in self.layers: output, scores = mod(output, prev=scores, key_padding_mask=key_padding_mask, attn_mask=attn_mask)
            return output
        else:
            for mod in self.layers: output = mod(output, key_padding_mask=key_padding_mask, attn_mask=attn_mask)
            return output



class TSTEncoderLayer(nn.Module):
    def __init__(self, q_len, d_model, n_heads, d_k=None, d_v=None, d_ff=256, store_attn=False,
                 norm='BatchNorm', attn_dropout=0, dropout=0., bias=True, activation="gelu", res_attention=False, pre_norm=False):
        super().__init__()
        assert not d_model%n_heads, f"d_model ({d_model}) must be divisible by n_heads ({n_heads})"
        d_k = d_model // n_heads if d_k is None else d_k
        d_v = d_model // n_heads if d_v is None else d_v

        # Multi-Head attention
        self.res_attention = res_attention
        self.self_attn = _MultiheadAttention(d_model, n_heads, d_k, d_v, attn_dropout=attn_dropout, proj_dropout=dropout, res_attention=res_attention)

        # Add & Norm
        self.dropout_attn = nn.Dropout(dropout)
        if "batch" in norm.lower():
            self.norm_attn = nn.Sequential(Transpose(1,2), nn.BatchNorm1d(d_model), Transpose(1,2))
        else:
            self.norm_attn = nn.LayerNorm(d_model)

        # Position-wise Feed-Forward
        self.ff = nn.Sequential(nn.Linear(d_model, d_ff, bias=bias),
                                get_activation_fn(activation),
                                nn.Dropout(dropout),
                                nn.Linear(d_ff, d_model, bias=bias))

        # Add & Norm
        self.dropout_ffn = nn.Dropout(dropout)
        if "batch" in norm.lower():
            self.norm_ffn = nn.Sequential(Transpose(1,2), nn.BatchNorm1d(d_model), Transpose(1,2))
        else:
            self.norm_ffn = nn.LayerNorm(d_model)

        self.pre_norm = pre_norm
        self.store_attn = store_attn


    def forward(self, src:Tensor, prev:Optional[Tensor]=None, key_padding_mask:Optional[Tensor]=None, attn_mask:Optional[Tensor]=None) -> Tensor:

        # Multi-Head attention sublayer
        if self.pre_norm:
            src = self.norm_attn(src)
        ## Multi-Head attention
        if self.res_attention:
            src2, attn, scores = self.self_attn(src, src, src, prev, key_padding_mask=key_padding_mask, attn_mask=attn_mask)
        else:
            src2, attn = self.self_attn(src, src, src, key_padding_mask=key_padding_mask, attn_mask=attn_mask)
        if self.store_attn:
            self.attn = attn
        ## Add & Norm
        src = src + self.dropout_attn(src2) # Add: residual connection with residual dropout
        if not self.pre_norm:
            src = self.norm_attn(src)

        # Feed-forward sublayer
        if self.pre_norm:
            src = self.norm_ffn(src)
        ## Position-wise Feed-Forward
        src2 = self.ff(src)
        ## Add & Norm
        src = src + self.dropout_ffn(src2) # Add: residual connection with residual dropout
        if not self.pre_norm:
            src = self.norm_ffn(src)

        if self.res_attention:
            return src, scores
        else:
            return src

class _MultiheadAttention(nn.Module):
    def __init__(self, d_model, n_heads, d_k=None, d_v=None, res_attention=False, attn_dropout=0., proj_dropout=0., qkv_bias=True, lsa=False):
        """Multi Head Attention Layer
        Input shape:
            Q:       [batch_size (bs) x max_q_len x d_model]
            K, V:    [batch_size (bs) x q_len x d_model]
            mask:    [q_len x q_len]
        """
        super().__init__()
        d_k = d_model // n_heads if d_k is None else d_k
        d_v = d_model // n_heads if d_v is None else d_v

        self.n_heads, self.d_k, self.d_v = n_heads, d_k, d_v

        self.W_Q = nn.Linear(d_model, d_k * n_heads, bias=qkv_bias)
        self.W_K = nn.Linear(d_model, d_k * n_heads, bias=qkv_bias)
        self.W_V = nn.Linear(d_model, d_v * n_heads, bias=qkv_bias)

        # Scaled Dot-Product Attention (multiple heads)
        self.res_attention = res_attention
        self.sdp_attn = _ScaledDotProductAttention(d_model, n_heads, attn_dropout=attn_dropout, res_attention=self.res_attention, lsa=lsa)

        # Poject output
        self.to_out = nn.Sequential(nn.Linear(n_heads * d_v, d_model), nn.Dropout(proj_dropout))


    def forward(self, Q:Tensor, K:Optional[Tensor]=None, V:Optional[Tensor]=None, prev:Optional[Tensor]=None,
                key_padding_mask:Optional[Tensor]=None, attn_mask:Optional[Tensor]=None):

        bs = Q.size(0)
        if K is None: K = Q
        if V is None: V = Q

        # Linear (+ split in multiple heads)
        q_s = self.W_Q(Q).view(bs, -1, self.n_heads, self.d_k).transpose(1,2)       # q_s    : [bs x n_heads x max_q_len x d_k]
        k_s = self.W_K(K).view(bs, -1, self.n_heads, self.d_k).permute(0,2,3,1)     # k_s    : [bs x n_heads x d_k x q_len] - transpose(1,2) + transpose(2,3)
        v_s = self.W_V(V).view(bs, -1, self.n_heads, self.d_v).transpose(1,2)       # v_s    : [bs x n_heads x q_len x d_v]

        # Apply Scaled Dot-Product Attention (multiple heads)
        if self.res_attention:
            output, attn_weights, attn_scores = self.sdp_attn(q_s, k_s, v_s, prev=prev, key_padding_mask=key_padding_mask, attn_mask=attn_mask)
        else:
            output, attn_weights = self.sdp_attn(q_s, k_s, v_s, key_padding_mask=key_padding_mask, attn_mask=attn_mask)
        # output: [bs x n_heads x q_len x d_v], attn: [bs x n_heads x q_len x q_len], scores: [bs x n_heads x max_q_len x q_len]

        # back to the original inputs dimensions
        output = output.transpose(1, 2).contiguous().view(bs, -1, self.n_heads * self.d_v) # output: [bs x q_len x n_heads * d_v]
        output = self.to_out(output)

        if self.res_attention: return output, attn_weights, attn_scores
        else: return output, attn_weights


class _ScaledDotProductAttention(nn.Module):
    r"""Scaled Dot-Product Attention module (Attention is all you need by Vaswani et al., 2017) with optional residual attention from previous layer
    (Realformer: Transformer likes residual attention by He et al, 2020) and locality self sttention (Vision Transformer for Small-Size Datasets
    by Lee et al, 2021)"""

    def __init__(self, d_model, n_heads, attn_dropout=0., res_attention=False, lsa=False):
        super().__init__()
        self.attn_dropout = nn.Dropout(attn_dropout)
        self.res_attention = res_attention
        head_dim = d_model // n_heads
        self.scale = nn.Parameter(torch.tensor(head_dim ** -0.5), requires_grad=lsa)
        self.lsa = lsa

    def forward(self, q:Tensor, k:Tensor, v:Tensor, prev:Optional[Tensor]=None, key_padding_mask:Optional[Tensor]=None, attn_mask:Optional[Tensor]=None):
        '''
        Input shape:
            q               : [bs x n_heads x max_q_len x d_k]
            k               : [bs x n_heads x d_k x seq_len]
            v               : [bs x n_heads x seq_len x d_v]
            prev            : [bs x n_heads x q_len x seq_len]
            key_padding_mask: [bs x seq_len]
            attn_mask       : [1 x seq_len x seq_len]
        Output shape:
            output:  [bs x n_heads x q_len x d_v]
            attn   : [bs x n_heads x q_len x seq_len]
            scores : [bs x n_heads x q_len x seq_len]
        '''

        # Scaled MatMul (q, k) - similarity scores for all pairs of positions in an input sequence
        attn_scores = torch.matmul(q, k) * self.scale      # attn_scores : [bs x n_heads x max_q_len x q_len]

        # Add pre-softmax attention scores from the previous layer (optional)
        if prev is not None: attn_scores = attn_scores + prev

        # Attention mask (optional)
        if attn_mask is not None:                                     # attn_mask with shape [q_len x seq_len] - only used when q_len == seq_len
            if attn_mask.dtype == torch.bool:
                attn_scores.masked_fill_(attn_mask, -np.inf)
            else:
                attn_scores += attn_mask

        # Key padding mask (optional)
        if key_padding_mask is not None:                              # mask with shape [bs x q_len] (only when max_w_len == q_len)
            attn_scores.masked_fill_(key_padding_mask.unsqueeze(1).unsqueeze(2), -np.inf)

        # normalize the attention weights
        attn_weights = F.softmax(attn_scores, dim=-1)                 # attn_weights   : [bs x n_heads x max_q_len x q_len]
        attn_weights = self.attn_dropout(attn_weights)

        # compute the new values given the attention weights
        output = torch.matmul(attn_weights, v)                        # output: [bs x n_heads x max_q_len x d_v]

        if self.res_attention: return output, attn_weights, attn_scores
        else: return output, attn_weights

In [88]:
from typing import Callable, Optional
import torch
from torch import nn
from torch import Tensor
import torch.nn.functional as F
import numpy as np


class Model(nn.Module):
    def __init__(self, configs, max_seq_len:Optional[int]=1024, d_k:Optional[int]=None, d_v:Optional[int]=None, norm:str='BatchNorm', attn_dropout:float=0.,
                 act:str="gelu", key_padding_mask:bool='auto',padding_var:Optional[int]=None, attn_mask:Optional[Tensor]=None, res_attention:bool=True,
                 pre_norm:bool=False, store_attn:bool=False, pe:str='zeros', learn_pe:bool=True, pretrain_head:bool=False, head_type = 'flatten', verbose:bool=False, **kwargs):

        super().__init__()

        # load parameters
        c_in = configs.enc_in
        context_window = configs.seq_len
        target_window = configs.pred_len

        n_layers = configs.e_layers
        n_heads = configs.n_heads
        d_model = configs.d_model
        d_ff = configs.d_ff
        dropout = configs.dropout
        fc_dropout = configs.fc_dropout
        head_dropout = configs.head_dropout

        individual = configs.individual

        patch_len = configs.patch_len
        stride = configs.stride
        padding_patch = configs.padding_patch

        revin = configs.revin
        affine = configs.affine
        subtract_last = configs.subtract_last

        decomposition = configs.decomposition
        kernel_size = configs.kernel_size


        # model
        self.decomposition = decomposition
        if self.decomposition:
            self.decomp_module = series_decomp(kernel_size)
            self.model_trend = PatchTST_backbone(c_in=c_in, context_window = context_window, target_window=target_window, patch_len=patch_len, stride=stride,
                                  max_seq_len=max_seq_len, n_layers=n_layers, d_model=d_model,
                                  n_heads=n_heads, d_k=d_k, d_v=d_v, d_ff=d_ff, norm=norm, attn_dropout=attn_dropout,
                                  dropout=dropout, act=act, key_padding_mask=key_padding_mask, padding_var=padding_var,
                                  attn_mask=attn_mask, res_attention=res_attention, pre_norm=pre_norm, store_attn=store_attn,
                                  pe=pe, learn_pe=learn_pe, fc_dropout=fc_dropout, head_dropout=head_dropout, padding_patch = padding_patch,
                                  pretrain_head=pretrain_head, head_type=head_type, individual=individual, revin=revin, affine=affine,
                                  subtract_last=subtract_last, verbose=verbose, **kwargs)
            self.model_res = PatchTST_backbone(c_in=c_in, context_window = context_window, target_window=target_window, patch_len=patch_len, stride=stride,
                                  max_seq_len=max_seq_len, n_layers=n_layers, d_model=d_model,
                                  n_heads=n_heads, d_k=d_k, d_v=d_v, d_ff=d_ff, norm=norm, attn_dropout=attn_dropout,
                                  dropout=dropout, act=act, key_padding_mask=key_padding_mask, padding_var=padding_var,
                                  attn_mask=attn_mask, res_attention=res_attention, pre_norm=pre_norm, store_attn=store_attn,
                                  pe=pe, learn_pe=learn_pe, fc_dropout=fc_dropout, head_dropout=head_dropout, padding_patch = padding_patch,
                                  pretrain_head=pretrain_head, head_type=head_type, individual=individual, revin=revin, affine=affine,
                                  subtract_last=subtract_last, verbose=verbose, **kwargs)
        else:
            self.model = PatchTST_backbone(c_in=c_in, context_window = context_window, target_window=target_window, patch_len=patch_len, stride=stride,
                                  max_seq_len=max_seq_len, n_layers=n_layers, d_model=d_model,
                                  n_heads=n_heads, d_k=d_k, d_v=d_v, d_ff=d_ff, norm=norm, attn_dropout=attn_dropout,
                                  dropout=dropout, act=act, key_padding_mask=key_padding_mask, padding_var=padding_var,
                                  attn_mask=attn_mask, res_attention=res_attention, pre_norm=pre_norm, store_attn=store_attn,
                                  pe=pe, learn_pe=learn_pe, fc_dropout=fc_dropout, head_dropout=head_dropout, padding_patch = padding_patch,
                                  pretrain_head=pretrain_head, head_type=head_type, individual=individual, revin=revin, affine=affine,
                                  subtract_last=subtract_last, verbose=verbose, **kwargs)

        self.datetime_linear = nn.Linear(configs.datetime_features, configs.enc_in)
        # classification
        self.dropout = nn.Dropout(configs.dropout)
        self.projection = nn.Linear(configs.enc_in, configs.num_class)

    def forward(self, x, x_mark_enc):           # x: [Batch, Input length, Channel]
        if self.decomposition:
            res_init, trend_init = self.decomp_module(x)
            res_init, trend_init = res_init.permute(0,2,1), trend_init.permute(0,2,1)  # x: [Batch, Channel, Input length]
            res = self.model_res(res_init)
            trend = self.model_trend(trend_init)
            x = res + trend
            x = x.permute(0,2,1)    # x: [Batch, Input length, Channel]
        else:
            time_embeds = self.datetime_linear(x_mark_enc)
            #print(f"x shape: {x.shape} | time_embeds: {time_embeds.shape}")
            enc = x * time_embeds
            enc = enc.permute(0,2,1)    # x: [Batch, Channel, Input length]
            enc = self.model(enc)
            enc = enc.permute(0,2,1)    # x: [Batch, Input length, Channel]
            #print(f"enc shape: {enc.shape}")
            enc = enc.squeeze(1)
            enc = self.dropout(enc)
            enc = self.projection(enc)
        return enc

In [89]:
def compute_direction(tensor):
  batch_size = tensor.size(0)
  comparison = tensor[:, 1, 0] > tensor[:, 0, 0]
  # Convert the comparison result to a one-hot vector
  one_hot = torch.zeros(batch_size, 2)
  one_hot[comparison, 0] = 1  # [1, 0] when the last value is greater
  one_hot[~comparison, 1] = 1  # [0, 1] when the first value is greater
  return one_hot

tensor = torch.randn(5, 2, 1)
print(tensor)
compute_direction(tensor)

tensor([[[ 1.1504],
         [ 0.9073]],

        [[-0.6343],
         [ 1.3786]],

        [[-0.2978],
         [-0.3395]],

        [[ 0.6772],
         [ 1.5184]],

        [[-0.4346],
         [-0.8706]]])


tensor([[0., 1.],
        [1., 0.],
        [0., 1.],
        [1., 0.],
        [0., 1.]])

# Prediction Test

In [90]:
from dataclasses import dataclass
@dataclass
class Args():
    freq: str = 'd'
    task_name: str = 'classification'
    num_class: int = 2
    seq_len: int = 36
    label_len: int = 18
    pred_len: int = 1
    e_layers: int = 2
    d_layers: int = 1
    n_heads: int = 16
    top_k: int = 5
    factor: int = 1
    enc_in: int = 32
    dec_in: int = 32
    c_out: int = 1
    d_model: int = 128
    d_ff: int = 512
    patch_len: int = 16
    moving_avg: int = 25
    factor: int = 3
    distil: bool = True
    output_attention: bool = False
    patience: int = 400
    stride: int = 1
    learning_rate: float = 0.0005
    batch_size: int = 32
    embed: str = 'timeF'
    activation: str = 'gelu'
    dropout: float = 0.0
    loss: str = 'mse'
    data: str = 'custom'
    features: str = 'MS'
    train_epochs: int = 100
    use_statistic: bool = False
    mask_rate: float = 0.25
    anomaly_ratio: float = 0.25
    num_kernels: int = 6
    moving_avg: int = 25
    activation: str = 'gelu'
    fc_dropout: float = 0.3
    head_dropout: float = 0.3
    momentum: float = 0.1
    dp_rank: int = 8
    merge_size: int = 2
    alpha: float = 0.5
    beta: float = 0.5
    individual: int = 0
    padding_patch: str = 'end'
    revin: int = 1
    affine: int = 0
    subtract_last: int = 0
    decomposition: int = 0
    kernel_size: int = 25

    ## Data
    batch_size: int = 32
    data_start_year: int = 1990
    data_end_year: int = 2023
    one_hot_datetime: bool = False
    datetime_features: int = 3

    ## Training
    run_name: str = "test"
    validation_years: int = 1
    test_years: int = 1
    ticker: str = "^SPX"
    rolling_window: int = 10 # How many training years to be included in each training dataset
    window_epoch: int = 50 # How many epochs to train per dataset
    reset_model: bool = False
    save_folder: str = "patchTST_rolling"

In [91]:
configs = Args()
patchTST = Model(configs)
device = "cuda" if torch.cuda.is_available() else "cpu"
model_optim = torch.optim.Adam(patchTST.parameters(), lr=configs.learning_rate)
loss_fn = nn.BCEWithLogitsLoss()
dataset = StockDataset(tickers='^SPX',timeenc=1, freq='d', size=[36, 18, 1], features='MS')

Loading following tickers: ['^SPX']

Dataset Start Year: 1990 | End Year: 2023
years: [1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002
 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016
 2017 2018 2019 2020 2021 2022 2023]
DateTime is one-hot: False


In [92]:
device = "cuda" if torch.cuda.is_available() else "cpu"
range_limit = configs.rolling_window+configs.validation_years+configs.test_years
loop_range = tqdm.tqdm(range(len(dataset)-range_limit))
patchTST = patchTST.to(device)
for iteration in loop_range:
  for window_iteration in range(configs.window_epoch):
    epoch_loss = []
    total_hits = 0
    total_data = 0
    total_ol = 0
    starting_idx = iteration // 10
    for i in range((configs.rolling_window+configs.validation_years+configs.test_years)):
        validation, test = False, False
        if i == configs.rolling_window:
          validation = True
        elif i == (configs.rolling_window+1):
          test = True
        else:
          batches, year  = dataset[i+iteration]
          batches_x, batches_y, batches_x_mark, batches_y_mark = batches[0], batches[1], batches[2], batches[3]
          for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(zip(batches_x, batches_y, batches_x_mark, batches_y_mark)):
            model_optim.zero_grad()
            batch_x = batch_x.float().to(device)
            batch_y = batch_y.float().to(device)
            batch_x_mark = batch_x_mark.float().to(device)
            batch_y_mark = batch_y_mark.float().to(device)
            #print(batch_x.shape)

            outputs = patchTST(batch_x, batch_x_mark)
            f_dim = -1 if configs.features == 'MS' else 0
            batch_y_direction = batch_y[:, -(configs.pred_len+1):, f_dim:].to(device)
            truth_direction = compute_direction(batch_y_direction).to(device)
            print(f"outputs: {outputs.shape} | truth_direction: {truth_direction.shape}")
            loss = loss_fn(outputs, truth_direction)
            epoch_loss.append(loss.item())
            loss.backward()
            model_optim.step()

            p = outputs.detach().cpu().numpy().argmax(axis=1)
            l = truth_direction.detach().cpu().numpy().argmax(axis=1)
            only_long = np.zeros_like(l)
            correct_preds = np.sum(p == l)
            only_longs = np.sum(only_long == l)

            total_hits += correct_preds
            total_data += outputs.size(0)
            total_ol += only_longs
        print(f"Year: {year} total_comparisons: {total_data} training_hit_ratio: {total_hits/total_data} | ol_hit_ratio: {total_ol/total_data}")
        break
    break
  break

  0%|          | 0/22 [00:00<?, ?it/s]

outputs: torch.Size([5, 2]) | truth_direction: torch.Size([5, 2])
outputs: torch.Size([5, 2]) | truth_direction: torch.Size([5, 2])
outputs: torch.Size([5, 2]) | truth_direction: torch.Size([5, 2])
outputs: torch.Size([5, 2]) | truth_direction: torch.Size([5, 2])
outputs: torch.Size([5, 2]) | truth_direction: torch.Size([5, 2])
outputs: torch.Size([5, 2]) | truth_direction: torch.Size([5, 2])
outputs: torch.Size([5, 2]) | truth_direction: torch.Size([5, 2])
outputs: torch.Size([5, 2]) | truth_direction: torch.Size([5, 2])
outputs: torch.Size([5, 2]) | truth_direction: torch.Size([5, 2])
outputs: torch.Size([5, 2]) | truth_direction: torch.Size([5, 2])
outputs: torch.Size([5, 2]) | truth_direction: torch.Size([5, 2])
outputs: torch.Size([5, 2]) | truth_direction: torch.Size([5, 2])
outputs: torch.Size([5, 2]) | truth_direction: torch.Size([5, 2])
outputs: torch.Size([5, 2]) | truth_direction: torch.Size([5, 2])
outputs: torch.Size([5, 2]) | truth_direction: torch.Size([5, 2])
outputs: t

  0%|          | 0/22 [00:00<?, ?it/s]

outputs: torch.Size([5, 2]) | truth_direction: torch.Size([5, 2])
outputs: torch.Size([5, 2]) | truth_direction: torch.Size([5, 2])
outputs: torch.Size([5, 2]) | truth_direction: torch.Size([5, 2])
outputs: torch.Size([5, 2]) | truth_direction: torch.Size([5, 2])
outputs: torch.Size([3, 2]) | truth_direction: torch.Size([3, 2])
Year: 1990 total_comparisons: 253 training_hit_ratio: 0.49407114624505927 | ol_hit_ratio: 0.5335968379446641





# Trainer

In [93]:
class Trainer():
  def __init__(self, configs):
    self.configs = configs

    ticker_str = configs.tickers.replace(" ", "_")
    self.run_name = f"[{ticker_str}]_valYrs:{self.configs.validation_years}_testYrs{self.configs.test_years}_reset:{self.configs.reset_model}_{configs.run_name}"
    self.device = "cuda" if torch.cuda.is_available() else "cpu"
    self.set_seed(configs.seed)
    self.dataset = self.make_data()
    self.model = Model(configs)
    self.model.to(self.device)
    self.loss_fn = nn.MSELoss()
    self.model_optim = torch.optim.Adam(self.model.parameters(), lr=configs.learning_rate)
    self.writer = SummaryWriter(f"/content/drive/MyDrive/code/fintransformer/runs/{self.run_name}")
    path = f"/content/drive/MyDrive/code/fintransformer/models/{self.configs.save_folder}/{self.run_name}"
    save_folder = f"/content/drive/MyDrive/code/fintransformer/models/{self.configs.save_folder}"
    if not os.path.exists(save_folder):
      os.mkdir(save_folder)
      print(f"Save Folder Made at {save_folder}")
    if not os.path.exists(path):
      os.mkdir(path)
      print(f"Save File Directory Made at {path}\n")
    else:
      print(f"Directory Already Exists at {path}")

  def set_seed(self, seed):
      torch.manual_seed(seed)
      torch.cuda.manual_seed(seed)
      #torch.cuda.manual_seed_all(seed)  # if you are using multi-GPU.
      np.random.seed(seed)
      random.seed(seed)
      torch.backends.cudnn.deterministic = True
      torch.backends.cudnn.benchmark = False

  def make_data(self):
    def collate_fn(batch):
      seq_x, seq_y, seq_x_mark, seq_y_mark, year = batch[0]
      return seq_x, seq_y, seq_x_mark, seq_y_mark, year
    dataset = StockDataset(tickers=self.configs.tickers, timeenc=1, freq='d', size=[self.configs.seq_len, self.configs.label_len, self.configs.pred_len],
                           features=self.configs.features, batch_size=self.configs.batch_size, one_hot_datetime=self.configs.one_hot_datetime,
                           data_start_year=self.configs.data_start_year, data_end_year=self.configs.data_end_year)
    """prices, labels, year = next(iter(val_loader))
    feature_num = prices.shape[2]
    label_num = labels.shape[1]
    print(f"Data shape: {prices.shape} | Num Features: {feature_num}")"""
    return dataset

  def run(self):
    ### Training ###
    print(f"Training model: {self.configs.run_name}")
    print()
    range_limit = self.configs.rolling_window + self.configs.validation_years + self.configs.test_years
    loop_range = tqdm.tqdm(range(len(self.dataset)-range_limit))
    recent_save_path = ""
    val_acc_arr = []
    test_acc_arr = []
    for iteration in loop_range:
      highest_acc = 0.0
      highest_test_acc = 0.0
      if self.configs.reset_model and iteration != 0:
        self.model = Model(self.configs)
        self.model.to(self.device)
        self.model_optim = torch.optim.Adam(self.model.parameters(), lr=self.configs.learning_rate)
        print(f"Model Has Been Reset to Random Parameters")
      else:
        self.load_model(recent_save_path) if recent_save_path != "" else None
      for window_iteration in range(self.configs.window_epoch):
        train_loss, average_val_loss, average_val_accuracy, average_test_accuracy, validation_year, test_year = self.train(iteration, window_iteration, highest_acc)
        if average_val_accuracy > highest_acc:
            highest_acc = average_val_accuracy
            highest_test_acc = average_test_accuracy
            recent_save_path = self.save_model(average_val_accuracy, average_test_accuracy, window_iteration, validation_year, test_year)
      #self.writer.add_scalar("Train/Test Loss", train_loss, iteration)
      val_acc_arr.append([highest_acc, validation_year])
      test_acc_arr.append([highest_test_acc, test_year])
      print(f"***********************************************************")
      print(f"Highest Validation Accuracy in {validation_year[0]}~{validation_year[-1]}: {highest_acc*100:.2f}% | Highest Test Accuracy in {test_year}: {average_test_accuracy*100:.2f}%")
      print(f"***********************************************************")
      print("=============================New Training Set====================================")
    self.writer.flush()
    self.writer.close()
    #print(f"Highest Accuracy in Validation Set: {highest_acc*100:.2f}")
    print()
    for val, test in zip(val_acc_arr, test_acc_arr):
      print(f"Validation Accuracy in {val[1][0]}~{val[1][-1]} : {val[0]*100:.2f}% | Test Accuracy in {test[1]}: {test[0]*100:.2f}%")

    average_first_elements_val = sum(item[0] for item in val_acc_arr) / len(val_acc_arr)
    average_first_elements_test = sum(item[0] for item in test_acc_arr) / len(test_acc_arr)
    print(f"Average Validation Accuracy: {average_first_elements_val*100:.2f}% | Average Test Accuracy: {average_first_elements_test*100:.2f}%")
    return

  def train(self, iteration, window_iteration, highest_acc):
    epoch_loss = []
    val_epoch_loss = []
    training_start_year, training_end_year = 0, 0
    training_total_hits = 0
    training_total_data = 0
    training_total_ol = 0
    validation_year, test_year = [], 0
    validation_total_hits = 0
    validation_total_data = 0
    validation_total_ol = 0
    test_total_hits = 0
    test_total_data = 0
    test_total_ol = 0
    for i in range((self.configs.rolling_window+self.configs.validation_years+self.configs.test_years)):
      validation, test = False, False
      if i >= self.configs.rolling_window and i <= (self.configs.rolling_window+self.configs.validation_years-1):
        batches, year = self.dataset[i+iteration]
        #print(f"validation year: {year}")
        batches_x, batches_y, batches_x_mark, batches_y_mark = batches[0], batches[1], batches[2], batches[3]
        accuracy, only_long, val_loss_epoch, total_hits, total_data, total_ol = self.eval_once(batches_x, batches_y, batches_x_mark, batches_y_mark, year, highest_acc=0, train_acc=0)
        validation_total_hits += total_hits
        validation_total_data += total_data
        validation_total_ol += total_ol
        val_epoch_loss.append(val_loss_epoch)
        validation_year.append(year)
        validation = True
        print(f"Year: {year} | Validation Accuracy: {(total_hits/total_data)*100:.2f}% | Only Long Accuracy: {(total_ol/total_data)*100:.2f}% | Highest Validation Accuracy: {highest_acc*100:.2f}%")
      elif i >= (self.configs.rolling_window+self.configs.validation_years):
        batches, year = self.dataset[i+iteration]
        #print(f"test year: {year}")
        batches_x, batches_y, batches_x_mark, batches_y_mark = batches[0], batches[1], batches[2], batches[3]
        accuracy, only_long,  total_hits, total_data, total_ol = self.test(batches_x, batches_y, batches_x_mark, batches_y_mark, year)
        test_total_hits += total_hits
        test_total_data += total_data
        test_total_ol += total_ol
        test_year = year
        test = True
      else:
        self.model.train()
        batches, year = self.dataset[i+iteration]
        training_start_year = year if i == 0 else training_start_year
        training_end_year = year if i == self.configs.rolling_window-1 else training_end_year
        #print(f"train year: {year}")
        batches_x, batches_y, batches_x_mark, batches_y_mark = batches[0], batches[1], batches[2], batches[3]
        for j, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(zip(batches_x, batches_y, batches_x_mark, batches_y_mark)):
          self.model_optim.zero_grad()

          batch_x = batch_x.float().to(self.device)
          batch_y = batch_y.float().to(self.device)
          true_batch_y = batch_y.float().to(self.device)
          batch_x_mark = batch_x_mark.float().to(self.device)
          batch_y_mark = batch_y_mark.float().to(self.device)
          #print(f"batch_x: {batch_x.shape} batch_y: {batch_y.shape} batch_x_mark: {batch_x_mark.shape} batch_y_mark: {batch_y_mark.shape}")
          dec_inp = torch.zeros_like(true_batch_y[:, -configs.pred_len:, :]).float()
          dec_inp = torch.cat([true_batch_y[:, :configs.label_len, :], dec_inp], dim=1).float().to(self.device)
          #print(batch_x.shape)

          #print(f"batch_x: {batch_x.shape} batch_y: {batch_y.shape} batch_x_mark: {batch_x_mark.shape} batch_y_mark: {batch_y_mark.shape} dec_inp: {dec_inp.shape}")
          outputs = self.model(batch_x, batch_x_mark)

          f_dim = -1 if self.configs.features == 'MS' else 0
          batch_y_direction = batch_y[:, -(self.configs.pred_len+1):, f_dim:].to(self.device)
          truth_direction = compute_direction(batch_y_direction).to(self.device)
          #print(f"outputs: {outputs.shape} | truth_direction: {truth_direction.shape}")
          loss = self.loss_fn(outputs, truth_direction)
          epoch_loss.append(loss.item())
          loss.backward()
          self.model_optim.step()

          p = outputs.detach().cpu().numpy().argmax(axis=1)
          l = truth_direction.detach().cpu().numpy().argmax(axis=1)
          only_long = np.zeros_like(l)
          correct_preds = np.sum(p == l)
          only_longs = np.sum(only_long == l)

          training_total_hits += correct_preds
          training_total_data += outputs.size(0)
          training_total_ol += only_longs

    loss_epoch = np.mean(epoch_loss)
    average_val_accuracy = validation_total_hits / validation_total_data
    average_val_only_long = validation_total_ol / validation_total_data
    average_val_loss = np.mean(val_epoch_loss)
    average_test_accuracy = test_total_hits / test_total_data
    average_test_only_long = test_total_ol / test_total_data
    train_accuracy = training_total_hits / training_total_data
    train_only_long = training_total_ol / training_total_data
    print(f"Training Years: {training_start_year}~{training_end_year} | Training Accuracy: {train_accuracy*100:.2f}% | Training OL Accuracy: {train_only_long*100:.2f}%")
    print(f"Year: {validation_year[0]}~{validation_year[-1]} Validation Accuracy: {average_val_accuracy*100:.2f}% | Only Long Accuracy: {average_val_only_long*100:.2f}% | Highest Validation Accuracy: {highest_acc*100:.2f}%")
    print(f"Year: {test_year} | Test Accuracy: {average_test_accuracy*100:.2f}% | Only Long Accuracy: {average_test_only_long*100:.2f}%")
    print(f"------------------------------------------------")
    self.writer.add_scalar(f"Train:{training_start_year}~{training_end_year}/Train Accuracy", train_accuracy, window_iteration)
    self.writer.add_scalar(f"Train:{training_start_year}~{training_end_year}/Train Loss", loss_epoch, window_iteration)
    self.writer.add_scalar(f"Train:{training_start_year}~{training_end_year}/Validation Loss", average_val_loss, window_iteration)
    self.writer.add_scalar(f"Train:{training_start_year}~{training_end_year}/Validation Accuracy", average_val_accuracy, window_iteration)
    self.writer.add_scalar(f"Train:{training_start_year}~{training_end_year}/Test Accuracy", average_test_accuracy, window_iteration)
      #print(f"Year: {year} training_hit_ratio: {hit_count/batch_x.shape[0]} | ol_hit_ratio: {only_long/batch_x.shape[0]}")

    #print(f"train accuracy: {accuracy*100:.2f}% | only_long: {only_long*100:.2f}%")
    return loss_epoch, average_val_loss, average_val_accuracy, average_test_accuracy, validation_year, test_year

  def eval_once(self, batches_x, batches_y, batches_x_mark, batches_y_mark, year, highest_acc, train_acc, last=False):
    self.model.eval()
    epoch_loss = []
    total_hits = 0
    total_data = 0
    total_ol = 0
    with torch.no_grad():
      for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(zip(batches_x, batches_y, batches_x_mark, batches_y_mark)):
        batch_x = batch_x.float().to(self.device)
        batch_y = batch_y.float().to(self.device)
        true_batch_y = batch_y.float().to(self.device)
        batch_x_mark = batch_x_mark.float().to(self.device)
        batch_y_mark = batch_y_mark.float().to(self.device)
        #print(f"batch_x: {batch_x.shape} batch_y: {batch_y.shape} batch_x_mark: {batch_x_mark.shape} batch_y_mark: {batch_y_mark.shape}")
        dec_inp = torch.zeros_like(true_batch_y[:, -configs.pred_len:, :]).float()
        dec_inp = torch.cat([true_batch_y[:, :configs.label_len, :], dec_inp], dim=1).float().to(self.device)
        #print(batch_x.shape)

        outputs = self.model(batch_x, batch_x_mark)

        f_dim = -1 if self.configs.features == 'MS' else 0
        #print(f"outputs: {outputs.shape} | batch_Y: {batch_y.shape}")
        batch_y_direction = batch_y[:, -(configs.pred_len+1):, f_dim:].to(self.device)
        truth_direction = compute_direction(batch_y_direction).to(self.device)


        loss = self.loss_fn(outputs, truth_direction)
        epoch_loss.append(loss.item())

        p = outputs.detach().cpu().numpy().argmax(axis=1)
        l = truth_direction.detach().cpu().numpy().argmax(axis=1)
        only_long = np.zeros_like(l)
        correct_preds = np.sum(p == l)
        only_longs = np.sum(only_long == l)

        total_hits += correct_preds
        total_data += outputs.size(0)
        total_ol += only_longs
        #print(f"Year: {year} hit_ratio: {(hit_count/count)*100:.2f}% | ol_hit_ratio: {(only_long/count)*100:.2f}%")

    accuracy = total_hits / total_data
    long_strategy = total_ol / total_data
    loss_epoch = np.mean(epoch_loss)
    #print(f"validation accuracy: {accuracy*100:.2f}% | val highest_acc: {highest_acc*100:.2f}% | val only long strategy: {long_strategy*100:.2f}%")
    return accuracy, long_strategy, loss_epoch, total_hits, total_data, total_ol

  def test(self, batches_x, batches_y, batches_x_mark, batches_y_mark, year):
    self.model.eval()
    total_hits = 0
    total_data = 0
    total_ol = 0
    with torch.no_grad():
      for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(zip(batches_x, batches_y, batches_x_mark, batches_y_mark)):
        batch_x = batch_x.float().to(self.device)
        batch_y = batch_y.float().to(self.device)
        true_batch_y = batch_y.float().to(self.device)
        batch_x_mark = batch_x_mark.float().to(self.device)
        batch_y_mark = batch_y_mark.float().to(self.device)
        #print(f"batch_x: {batch_x.shape} batch_y: {batch_y.shape} batch_x_mark: {batch_x_mark.shape} batch_y_mark: {batch_y_mark.shape}")
        dec_inp = torch.zeros_like(true_batch_y[:, -configs.pred_len:, :]).float()
        dec_inp = torch.cat([true_batch_y[:, :configs.label_len, :], dec_inp], dim=1).float().to(self.device)
        #print(batch_x.shape)

        outputs = self.model(batch_x, batch_x_mark)

        f_dim = -1 if self.configs.features == 'MS' else 0
        #print(f"outputs: {outputs.shape} | batch_Y: {batch_y.shape}")
        batch_y_direction = batch_y[:, -(configs.pred_len+1):, f_dim:].to(self.device)
        truth_direction = compute_direction(batch_y_direction).to(self.device)

        p = outputs.detach().cpu().numpy().argmax(axis=1)
        l = truth_direction.detach().cpu().numpy().argmax(axis=1)
        only_long = np.zeros_like(l)
        correct_preds = np.sum(p == l)
        only_longs = np.sum(only_long == l)

        total_hits += correct_preds
        total_data += outputs.size(0)
        total_ol += only_longs
        #print(f"Year: {year} hit_ratio: {(hit_count/count)*100:.2f}% | ol_hit_ratio: {(only_long/count)*100:.2f}%")

    accuracy = total_hits / total_data
    long_strategy = total_ol / total_data
    #print(f"test accuracy: {accuracy*100:.2f}% | test only long strategy: {long_strategy*100:.2f}%")
    return accuracy, long_strategy, total_hits, total_data, total_ol

  def save_model(self, val_acc, test_acc, epoch, validation_year, test_year):
    PATH = f"/content/drive/MyDrive/code/fintransformer/models/{self.configs.save_folder}/{self.run_name}/reset:{self.configs.reset_model}_valYear:{validation_year}_testYear:{test_year}.pt"
    torch.save({
            'model_state_dict': self.model.state_dict(),
            'model_optimizer_state_dict': self.model_optim.state_dict(),
            'val_acc': val_acc,
            'validation_year': validation_year,
            'test_acc': test_acc,
            'test_year': test_year,
            'configs': self.configs}, PATH)
    print(f"Model Saved at {PATH}")
    return PATH

  def load_model(self, path):
    checkpoint = torch.load(path, map_location=torch.device(self.device))
    self.model.load_state_dict(checkpoint['model_state_dict'])
    self.model_optim.load_state_dict(checkpoint['model_optimizer_state_dict'])
    print(f"Model loaded from {path}")
    print(f"Model Validation Accuracy: {checkpoint['val_acc']*100:.2f}% | Test Accuracy: {checkpoint['test_acc']*100:.2f}%")
    try:
      self.configs = checkpoint['configs']
    except:
      print("No Configs Found in torch file")
    return

In [96]:
from dataclasses import dataclass
@dataclass
class Args():
    freq: str = 'd'
    task_name: str = 'classification'
    num_class: int = 2
    seq_len: int = 36
    label_len: int = 18
    pred_len: int = 1
    e_layers: int = 2
    d_layers: int = 1
    n_heads: int = 16
    top_k: int = 5
    factor: int = 1
    enc_in: int = 32
    dec_in: int = 32
    c_out: int = 1
    d_model: int = 128
    d_ff: int = 512
    patch_len: int = 16
    moving_avg: int = 25
    factor: int = 3
    distil: bool = True
    output_attention: bool = False
    patience: int = 400
    stride: int = 1
    learning_rate: float = 0.0005
    batch_size: int = 32
    embed: str = 'timeF'
    activation: str = 'gelu'
    dropout: float = 0.0
    loss: str = 'mse'
    data: str = 'custom'
    features: str = 'MS'
    train_epochs: int = 100
    use_statistic: bool = False
    mask_rate: float = 0.25
    anomaly_ratio: float = 0.25
    num_kernels: int = 6
    moving_avg: int = 25
    activation: str = 'gelu'
    fc_dropout: float = 0.3
    head_dropout: float = 0.3
    momentum: float = 0.1
    dp_rank: int = 8
    merge_size: int = 2
    alpha: float = 0.5
    beta: float = 0.5
    individual: int = 0
    padding_patch: str = 'end'
    revin: int = 1
    affine: int = 0
    subtract_last: int = 0
    decomposition: int = 0
    kernel_size: int = 25

    ## Data
    batch_size: int = 32
    data_start_year: int = 1990
    data_end_year: int = 2023
    one_hot_datetime: bool = False
    datetime_features: int = 3

    ## Training
    run_name: str = "scale_run3"
    seed: int = 2024
    validation_years: int = 2
    test_years: int = 1
    tickers: str = "^SPX" #"goog amzn wmt xom brk-a lly ge lin pld aapl nee"
    rolling_window: int = 10 # How many training years to be included in each training dataset
    window_epoch: int = 50 # How many epochs to train per dataset
    reset_model: bool = False
    save_folder: str = "patchTST_classification"

In [97]:
configs = Args()
trainer = Trainer(configs)

Loading following tickers: ['^SPX']

Dataset Start Year: 1990 | End Year: 2023
years: [1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002
 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016
 2017 2018 2019 2020 2021 2022 2023]
DateTime is one-hot: False
Save File Directory Made at /content/drive/MyDrive/code/fintransformer/models/patchTST_classification/[^SPX]_valYrs:2_testYrs1_reset:False_scale_run3



In [98]:
trainer.run()

Training model: scale_run3



  0%|          | 0/21 [00:00<?, ?it/s]

Year: 2000 | Validation Accuracy: 43.65% | Only Long Accuracy: 47.62% | Highest Validation Accuracy: 0.00%
Year: 2001 | Validation Accuracy: 45.16% | Only Long Accuracy: 47.98% | Highest Validation Accuracy: 0.00%
Training Years: 1990~1999 | Training Accuracy: 48.93% | Training OL Accuracy: 53.60%
Year: 2000~2001 Validation Accuracy: 44.40% | Only Long Accuracy: 47.80% | Highest Validation Accuracy: 0.00%
Year: 2002 | Test Accuracy: 45.24% | Only Long Accuracy: 44.44%
------------------------------------------------
Model Saved at /content/drive/MyDrive/code/fintransformer/models/patchTST_classification/[^SPX]_valYrs:2_testYrs1_reset:False_scale_run3/reset:False_valYear:[2000, 2001]_testYear:2002.pt
Year: 2000 | Validation Accuracy: 47.62% | Only Long Accuracy: 47.62% | Highest Validation Accuracy: 44.40%
Year: 2001 | Validation Accuracy: 47.98% | Only Long Accuracy: 47.98% | Highest Validation Accuracy: 44.40%
Training Years: 1990~1999 | Training Accuracy: 51.07% | Training OL Accurac

  5%|▍         | 1/21 [01:29<29:46, 89.31s/it]

Year: 2000 | Validation Accuracy: 51.98% | Only Long Accuracy: 47.62% | Highest Validation Accuracy: 55.40%
Year: 2001 | Validation Accuracy: 51.61% | Only Long Accuracy: 47.98% | Highest Validation Accuracy: 55.40%
Training Years: 1990~1999 | Training Accuracy: 98.85% | Training OL Accuracy: 53.60%
Year: 2000~2001 Validation Accuracy: 51.80% | Only Long Accuracy: 47.80% | Highest Validation Accuracy: 55.40%
Year: 2002 | Test Accuracy: 49.60% | Only Long Accuracy: 44.44%
------------------------------------------------
***********************************************************
Highest Validation Accuracy in 2000~2001: 55.40% | Highest Test Accuracy in 2002: 49.60%
***********************************************************
Model loaded from /content/drive/MyDrive/code/fintransformer/models/patchTST_classification/[^SPX]_valYrs:2_testYrs1_reset:False_scale_run3/reset:False_valYear:[2000, 2001]_testYear:2002.pt
Model Validation Accuracy: 55.40% | Test Accuracy: 48.81%


  checkpoint = torch.load(path, map_location=torch.device(self.device))


Year: 2001 | Validation Accuracy: 53.23% | Only Long Accuracy: 47.98% | Highest Validation Accuracy: 0.00%
Year: 2002 | Validation Accuracy: 48.81% | Only Long Accuracy: 44.44% | Highest Validation Accuracy: 0.00%
Training Years: 1991~2000 | Training Accuracy: 89.39% | Training OL Accuracy: 53.03%
Year: 2001~2002 Validation Accuracy: 51.00% | Only Long Accuracy: 46.20% | Highest Validation Accuracy: 0.00%
Year: 2003 | Test Accuracy: 51.59% | Only Long Accuracy: 54.37%
------------------------------------------------
Model Saved at /content/drive/MyDrive/code/fintransformer/models/patchTST_classification/[^SPX]_valYrs:2_testYrs1_reset:False_scale_run3/reset:False_valYear:[2001, 2002]_testYear:2003.pt
Year: 2001 | Validation Accuracy: 55.24% | Only Long Accuracy: 47.98% | Highest Validation Accuracy: 51.00%
Year: 2002 | Validation Accuracy: 51.98% | Only Long Accuracy: 44.44% | Highest Validation Accuracy: 51.00%
Training Years: 1991~2000 | Training Accuracy: 90.70% | Training OL Accurac

 10%|▉         | 2/21 [03:03<29:14, 92.33s/it]

Year: 2001 | Validation Accuracy: 49.19% | Only Long Accuracy: 47.98% | Highest Validation Accuracy: 55.00%
Year: 2002 | Validation Accuracy: 47.22% | Only Long Accuracy: 44.44% | Highest Validation Accuracy: 55.00%
Training Years: 1991~2000 | Training Accuracy: 99.64% | Training OL Accuracy: 53.03%
Year: 2001~2002 Validation Accuracy: 48.20% | Only Long Accuracy: 46.20% | Highest Validation Accuracy: 55.00%
Year: 2003 | Test Accuracy: 53.17% | Only Long Accuracy: 54.37%
------------------------------------------------
***********************************************************
Highest Validation Accuracy in 2001~2002: 55.00% | Highest Test Accuracy in 2003: 53.17%
***********************************************************
Model loaded from /content/drive/MyDrive/code/fintransformer/models/patchTST_classification/[^SPX]_valYrs:2_testYrs1_reset:False_scale_run3/reset:False_valYear:[2001, 2002]_testYear:2003.pt
Model Validation Accuracy: 55.00% | Test Accuracy: 50.00%


  checkpoint = torch.load(path, map_location=torch.device(self.device))


Year: 2002 | Validation Accuracy: 54.37% | Only Long Accuracy: 44.44% | Highest Validation Accuracy: 0.00%
Year: 2003 | Validation Accuracy: 46.03% | Only Long Accuracy: 54.37% | Highest Validation Accuracy: 0.00%
Training Years: 1992~2001 | Training Accuracy: 94.57% | Training OL Accuracy: 52.93%
Year: 2002~2003 Validation Accuracy: 50.20% | Only Long Accuracy: 49.40% | Highest Validation Accuracy: 0.00%
Year: 2004 | Test Accuracy: 45.63% | Only Long Accuracy: 55.56%
------------------------------------------------
Model Saved at /content/drive/MyDrive/code/fintransformer/models/patchTST_classification/[^SPX]_valYrs:2_testYrs1_reset:False_scale_run3/reset:False_valYear:[2002, 2003]_testYear:2004.pt
Year: 2002 | Validation Accuracy: 48.81% | Only Long Accuracy: 44.44% | Highest Validation Accuracy: 50.20%
Year: 2003 | Validation Accuracy: 50.00% | Only Long Accuracy: 54.37% | Highest Validation Accuracy: 50.20%
Training Years: 1992~2001 | Training Accuracy: 96.07% | Training OL Accurac

 14%|█▍        | 3/21 [04:37<27:56, 93.15s/it]

Year: 2002 | Validation Accuracy: 52.78% | Only Long Accuracy: 44.44% | Highest Validation Accuracy: 52.78%
Year: 2003 | Validation Accuracy: 51.98% | Only Long Accuracy: 54.37% | Highest Validation Accuracy: 52.78%
Training Years: 1992~2001 | Training Accuracy: 99.72% | Training OL Accuracy: 52.93%
Year: 2002~2003 Validation Accuracy: 52.38% | Only Long Accuracy: 49.40% | Highest Validation Accuracy: 52.78%
Year: 2004 | Test Accuracy: 50.79% | Only Long Accuracy: 55.56%
------------------------------------------------
***********************************************************
Highest Validation Accuracy in 2002~2003: 52.78% | Highest Test Accuracy in 2004: 50.79%
***********************************************************
Model loaded from /content/drive/MyDrive/code/fintransformer/models/patchTST_classification/[^SPX]_valYrs:2_testYrs1_reset:False_scale_run3/reset:False_valYear:[2002, 2003]_testYear:2004.pt
Model Validation Accuracy: 52.78% | Test Accuracy: 47.22%


  checkpoint = torch.load(path, map_location=torch.device(self.device))


Year: 2003 | Validation Accuracy: 51.59% | Only Long Accuracy: 54.37% | Highest Validation Accuracy: 0.00%
Year: 2004 | Validation Accuracy: 47.62% | Only Long Accuracy: 55.56% | Highest Validation Accuracy: 0.00%
Training Years: 1993~2002 | Training Accuracy: 94.44% | Training OL Accuracy: 52.26%
Year: 2003~2004 Validation Accuracy: 49.60% | Only Long Accuracy: 54.96% | Highest Validation Accuracy: 0.00%
Year: 2005 | Test Accuracy: 49.60% | Only Long Accuracy: 55.95%
------------------------------------------------
Model Saved at /content/drive/MyDrive/code/fintransformer/models/patchTST_classification/[^SPX]_valYrs:2_testYrs1_reset:False_scale_run3/reset:False_valYear:[2003, 2004]_testYear:2005.pt
Year: 2003 | Validation Accuracy: 49.21% | Only Long Accuracy: 54.37% | Highest Validation Accuracy: 49.60%
Year: 2004 | Validation Accuracy: 51.19% | Only Long Accuracy: 55.56% | Highest Validation Accuracy: 49.60%
Training Years: 1993~2002 | Training Accuracy: 97.38% | Training OL Accurac

 19%|█▉        | 4/21 [06:12<26:31, 93.64s/it]

Year: 2003 | Validation Accuracy: 50.40% | Only Long Accuracy: 54.37% | Highest Validation Accuracy: 53.37%
Year: 2004 | Validation Accuracy: 48.81% | Only Long Accuracy: 55.56% | Highest Validation Accuracy: 53.37%
Training Years: 1993~2002 | Training Accuracy: 99.92% | Training OL Accuracy: 52.26%
Year: 2003~2004 Validation Accuracy: 49.60% | Only Long Accuracy: 54.96% | Highest Validation Accuracy: 53.37%
Year: 2005 | Test Accuracy: 52.38% | Only Long Accuracy: 55.95%
------------------------------------------------
***********************************************************
Highest Validation Accuracy in 2003~2004: 53.37% | Highest Test Accuracy in 2005: 52.38%
***********************************************************
Model loaded from /content/drive/MyDrive/code/fintransformer/models/patchTST_classification/[^SPX]_valYrs:2_testYrs1_reset:False_scale_run3/reset:False_valYear:[2003, 2004]_testYear:2005.pt
Model Validation Accuracy: 53.37% | Test Accuracy: 49.60%


  checkpoint = torch.load(path, map_location=torch.device(self.device))


Year: 2004 | Validation Accuracy: 52.78% | Only Long Accuracy: 55.56% | Highest Validation Accuracy: 0.00%
Year: 2005 | Validation Accuracy: 49.60% | Only Long Accuracy: 55.95% | Highest Validation Accuracy: 0.00%
Training Years: 1994~2003 | Training Accuracy: 95.00% | Training OL Accuracy: 52.56%
Year: 2004~2005 Validation Accuracy: 51.19% | Only Long Accuracy: 55.75% | Highest Validation Accuracy: 0.00%
Year: 2006 | Test Accuracy: 46.61% | Only Long Accuracy: 56.18%
------------------------------------------------
Model Saved at /content/drive/MyDrive/code/fintransformer/models/patchTST_classification/[^SPX]_valYrs:2_testYrs1_reset:False_scale_run3/reset:False_valYear:[2004, 2005]_testYear:2006.pt
Year: 2004 | Validation Accuracy: 50.79% | Only Long Accuracy: 55.56% | Highest Validation Accuracy: 51.19%
Year: 2005 | Validation Accuracy: 51.19% | Only Long Accuracy: 55.95% | Highest Validation Accuracy: 51.19%
Training Years: 1994~2003 | Training Accuracy: 98.17% | Training OL Accurac

 24%|██▍       | 5/21 [07:46<25:00, 93.81s/it]

Year: 2004 | Validation Accuracy: 52.38% | Only Long Accuracy: 55.56% | Highest Validation Accuracy: 53.17%
Year: 2005 | Validation Accuracy: 54.37% | Only Long Accuracy: 55.95% | Highest Validation Accuracy: 53.17%
Training Years: 1994~2003 | Training Accuracy: 100.00% | Training OL Accuracy: 52.56%
Year: 2004~2005 Validation Accuracy: 53.37% | Only Long Accuracy: 55.75% | Highest Validation Accuracy: 53.17%
Year: 2006 | Test Accuracy: 46.22% | Only Long Accuracy: 56.18%
------------------------------------------------
Model Saved at /content/drive/MyDrive/code/fintransformer/models/patchTST_classification/[^SPX]_valYrs:2_testYrs1_reset:False_scale_run3/reset:False_valYear:[2004, 2005]_testYear:2006.pt
***********************************************************
Highest Validation Accuracy in 2004~2005: 53.37% | Highest Test Accuracy in 2006: 46.22%
***********************************************************


  checkpoint = torch.load(path, map_location=torch.device(self.device))


Model loaded from /content/drive/MyDrive/code/fintransformer/models/patchTST_classification/[^SPX]_valYrs:2_testYrs1_reset:False_scale_run3/reset:False_valYear:[2004, 2005]_testYear:2006.pt
Model Validation Accuracy: 53.37% | Test Accuracy: 46.22%
Year: 2005 | Validation Accuracy: 53.17% | Only Long Accuracy: 55.95% | Highest Validation Accuracy: 0.00%
Year: 2006 | Validation Accuracy: 50.60% | Only Long Accuracy: 56.18% | Highest Validation Accuracy: 0.00%
Training Years: 1995~2004 | Training Accuracy: 94.68% | Training OL Accuracy: 52.84%
Year: 2005~2006 Validation Accuracy: 51.89% | Only Long Accuracy: 56.06% | Highest Validation Accuracy: 0.00%
Year: 2007 | Test Accuracy: 49.40% | Only Long Accuracy: 54.58%
------------------------------------------------
Model Saved at /content/drive/MyDrive/code/fintransformer/models/patchTST_classification/[^SPX]_valYrs:2_testYrs1_reset:False_scale_run3/reset:False_valYear:[2005, 2006]_testYear:2007.pt
Year: 2005 | Validation Accuracy: 50.40% | 

 29%|██▊       | 6/21 [09:20<23:28, 93.93s/it]

Year: 2005 | Validation Accuracy: 48.41% | Only Long Accuracy: 55.95% | Highest Validation Accuracy: 53.68%
Year: 2006 | Validation Accuracy: 53.78% | Only Long Accuracy: 56.18% | Highest Validation Accuracy: 53.68%
Training Years: 1995~2004 | Training Accuracy: 99.92% | Training OL Accuracy: 52.84%
Year: 2005~2006 Validation Accuracy: 51.09% | Only Long Accuracy: 56.06% | Highest Validation Accuracy: 53.68%
Year: 2007 | Test Accuracy: 46.22% | Only Long Accuracy: 54.58%
------------------------------------------------
***********************************************************
Highest Validation Accuracy in 2005~2006: 53.68% | Highest Test Accuracy in 2007: 46.22%
***********************************************************
Model loaded from /content/drive/MyDrive/code/fintransformer/models/patchTST_classification/[^SPX]_valYrs:2_testYrs1_reset:False_scale_run3/reset:False_valYear:[2005, 2006]_testYear:2007.pt
Model Validation Accuracy: 53.68% | Test Accuracy: 45.02%


  checkpoint = torch.load(path, map_location=torch.device(self.device))


Year: 2006 | Validation Accuracy: 50.20% | Only Long Accuracy: 56.18% | Highest Validation Accuracy: 0.00%
Year: 2007 | Validation Accuracy: 44.62% | Only Long Accuracy: 54.58% | Highest Validation Accuracy: 0.00%
Training Years: 1996~2005 | Training Accuracy: 94.80% | Training OL Accuracy: 52.24%
Year: 2006~2007 Validation Accuracy: 47.41% | Only Long Accuracy: 55.38% | Highest Validation Accuracy: 0.00%
Year: 2008 | Test Accuracy: 44.27% | Only Long Accuracy: 49.80%
------------------------------------------------
Model Saved at /content/drive/MyDrive/code/fintransformer/models/patchTST_classification/[^SPX]_valYrs:2_testYrs1_reset:False_scale_run3/reset:False_valYear:[2006, 2007]_testYear:2008.pt
Year: 2006 | Validation Accuracy: 49.40% | Only Long Accuracy: 56.18% | Highest Validation Accuracy: 47.41%
Year: 2007 | Validation Accuracy: 41.83% | Only Long Accuracy: 54.58% | Highest Validation Accuracy: 47.41%
Training Years: 1996~2005 | Training Accuracy: 99.44% | Training OL Accurac

 33%|███▎      | 7/21 [10:54<21:56, 94.07s/it]

Year: 2006 | Validation Accuracy: 48.61% | Only Long Accuracy: 56.18% | Highest Validation Accuracy: 50.40%
Year: 2007 | Validation Accuracy: 49.40% | Only Long Accuracy: 54.58% | Highest Validation Accuracy: 50.40%
Training Years: 1996~2005 | Training Accuracy: 100.00% | Training OL Accuracy: 52.24%
Year: 2006~2007 Validation Accuracy: 49.00% | Only Long Accuracy: 55.38% | Highest Validation Accuracy: 50.40%
Year: 2008 | Test Accuracy: 49.80% | Only Long Accuracy: 49.80%
------------------------------------------------
***********************************************************
Highest Validation Accuracy in 2006~2007: 50.40% | Highest Test Accuracy in 2008: 49.80%
***********************************************************
Model loaded from /content/drive/MyDrive/code/fintransformer/models/patchTST_classification/[^SPX]_valYrs:2_testYrs1_reset:False_scale_run3/reset:False_valYear:[2006, 2007]_testYear:2008.pt
Model Validation Accuracy: 50.40% | Test Accuracy: 49.80%


  checkpoint = torch.load(path, map_location=torch.device(self.device))


Year: 2007 | Validation Accuracy: 45.82% | Only Long Accuracy: 54.58% | Highest Validation Accuracy: 0.00%
Year: 2008 | Validation Accuracy: 47.43% | Only Long Accuracy: 49.80% | Highest Validation Accuracy: 0.00%
Training Years: 1997~2006 | Training Accuracy: 94.87% | Training OL Accuracy: 52.42%
Year: 2007~2008 Validation Accuracy: 46.63% | Only Long Accuracy: 52.18% | Highest Validation Accuracy: 0.00%
Year: 2009 | Test Accuracy: 50.40% | Only Long Accuracy: 55.56%
------------------------------------------------
Model Saved at /content/drive/MyDrive/code/fintransformer/models/patchTST_classification/[^SPX]_valYrs:2_testYrs1_reset:False_scale_run3/reset:False_valYear:[2007, 2008]_testYear:2009.pt
Year: 2007 | Validation Accuracy: 46.61% | Only Long Accuracy: 54.58% | Highest Validation Accuracy: 46.63%
Year: 2008 | Validation Accuracy: 45.45% | Only Long Accuracy: 49.80% | Highest Validation Accuracy: 46.63%
Training Years: 1997~2006 | Training Accuracy: 98.89% | Training OL Accurac

 38%|███▊      | 8/21 [12:29<20:24, 94.17s/it]

Year: 2007 | Validation Accuracy: 45.42% | Only Long Accuracy: 54.58% | Highest Validation Accuracy: 49.21%
Year: 2008 | Validation Accuracy: 45.85% | Only Long Accuracy: 49.80% | Highest Validation Accuracy: 49.21%
Training Years: 1997~2006 | Training Accuracy: 99.92% | Training OL Accuracy: 52.42%
Year: 2007~2008 Validation Accuracy: 45.63% | Only Long Accuracy: 52.18% | Highest Validation Accuracy: 49.21%
Year: 2009 | Test Accuracy: 54.76% | Only Long Accuracy: 55.56%
------------------------------------------------
***********************************************************
Highest Validation Accuracy in 2007~2008: 49.21% | Highest Test Accuracy in 2009: 54.76%
***********************************************************
Model loaded from /content/drive/MyDrive/code/fintransformer/models/patchTST_classification/[^SPX]_valYrs:2_testYrs1_reset:False_scale_run3/reset:False_valYear:[2007, 2008]_testYear:2009.pt
Model Validation Accuracy: 49.21% | Test Accuracy: 50.79%


  checkpoint = torch.load(path, map_location=torch.device(self.device))


Year: 2008 | Validation Accuracy: 49.01% | Only Long Accuracy: 49.80% | Highest Validation Accuracy: 0.00%
Year: 2009 | Validation Accuracy: 50.40% | Only Long Accuracy: 55.56% | Highest Validation Accuracy: 0.00%
Training Years: 1998~2007 | Training Accuracy: 94.75% | Training OL Accuracy: 52.35%
Year: 2008~2009 Validation Accuracy: 49.70% | Only Long Accuracy: 52.67% | Highest Validation Accuracy: 0.00%
Year: 2010 | Test Accuracy: 51.19% | Only Long Accuracy: 57.14%
------------------------------------------------
Model Saved at /content/drive/MyDrive/code/fintransformer/models/patchTST_classification/[^SPX]_valYrs:2_testYrs1_reset:False_scale_run3/reset:False_valYear:[2008, 2009]_testYear:2010.pt
Year: 2008 | Validation Accuracy: 50.20% | Only Long Accuracy: 49.80% | Highest Validation Accuracy: 49.70%
Year: 2009 | Validation Accuracy: 48.81% | Only Long Accuracy: 55.56% | Highest Validation Accuracy: 49.70%
Training Years: 1998~2007 | Training Accuracy: 99.12% | Training OL Accurac

 43%|████▎     | 9/21 [14:03<18:51, 94.27s/it]

Year: 2008 | Validation Accuracy: 49.41% | Only Long Accuracy: 49.80% | Highest Validation Accuracy: 53.66%
Year: 2009 | Validation Accuracy: 48.81% | Only Long Accuracy: 55.56% | Highest Validation Accuracy: 53.66%
Training Years: 1998~2007 | Training Accuracy: 99.92% | Training OL Accuracy: 52.35%
Year: 2008~2009 Validation Accuracy: 49.11% | Only Long Accuracy: 52.67% | Highest Validation Accuracy: 53.66%
Year: 2010 | Test Accuracy: 55.56% | Only Long Accuracy: 57.14%
------------------------------------------------
***********************************************************
Highest Validation Accuracy in 2008~2009: 53.66% | Highest Test Accuracy in 2010: 55.56%
***********************************************************
Model loaded from /content/drive/MyDrive/code/fintransformer/models/patchTST_classification/[^SPX]_valYrs:2_testYrs1_reset:False_scale_run3/reset:False_valYear:[2008, 2009]_testYear:2010.pt
Model Validation Accuracy: 53.66% | Test Accuracy: 53.97%


  checkpoint = torch.load(path, map_location=torch.device(self.device))


Year: 2009 | Validation Accuracy: 49.21% | Only Long Accuracy: 55.56% | Highest Validation Accuracy: 0.00%
Year: 2010 | Validation Accuracy: 51.98% | Only Long Accuracy: 57.14% | Highest Validation Accuracy: 0.00%
Training Years: 1999~2008 | Training Accuracy: 95.15% | Training OL Accuracy: 51.77%
Year: 2009~2010 Validation Accuracy: 50.60% | Only Long Accuracy: 56.35% | Highest Validation Accuracy: 0.00%
Year: 2011 | Test Accuracy: 51.98% | Only Long Accuracy: 54.76%
------------------------------------------------
Model Saved at /content/drive/MyDrive/code/fintransformer/models/patchTST_classification/[^SPX]_valYrs:2_testYrs1_reset:False_scale_run3/reset:False_valYear:[2009, 2010]_testYear:2011.pt
Year: 2009 | Validation Accuracy: 47.22% | Only Long Accuracy: 55.56% | Highest Validation Accuracy: 50.60%
Year: 2010 | Validation Accuracy: 53.97% | Only Long Accuracy: 57.14% | Highest Validation Accuracy: 50.60%
Training Years: 1999~2008 | Training Accuracy: 99.28% | Training OL Accurac

 48%|████▊     | 10/21 [15:38<17:18, 94.37s/it]

Year: 2010 | Validation Accuracy: 53.97% | Only Long Accuracy: 57.14% | Highest Validation Accuracy: 54.37%
Training Years: 1999~2008 | Training Accuracy: 100.00% | Training OL Accuracy: 51.77%
Year: 2009~2010 Validation Accuracy: 50.60% | Only Long Accuracy: 56.35% | Highest Validation Accuracy: 54.37%
Year: 2011 | Test Accuracy: 48.81% | Only Long Accuracy: 54.76%
------------------------------------------------
***********************************************************
Highest Validation Accuracy in 2009~2010: 54.37% | Highest Test Accuracy in 2011: 48.81%
***********************************************************
Model loaded from /content/drive/MyDrive/code/fintransformer/models/patchTST_classification/[^SPX]_valYrs:2_testYrs1_reset:False_scale_run3/reset:False_valYear:[2009, 2010]_testYear:2011.pt
Model Validation Accuracy: 54.37% | Test Accuracy: 46.03%


  checkpoint = torch.load(path, map_location=torch.device(self.device))


Year: 2010 | Validation Accuracy: 51.19% | Only Long Accuracy: 57.14% | Highest Validation Accuracy: 0.00%
Year: 2011 | Validation Accuracy: 46.83% | Only Long Accuracy: 54.76% | Highest Validation Accuracy: 0.00%
Training Years: 2000~2009 | Training Accuracy: 94.91% | Training OL Accuracy: 52.21%
Year: 2010~2011 Validation Accuracy: 49.01% | Only Long Accuracy: 55.95% | Highest Validation Accuracy: 0.00%
Year: 2012 | Test Accuracy: 53.20% | Only Long Accuracy: 52.80%
------------------------------------------------
Model Saved at /content/drive/MyDrive/code/fintransformer/models/patchTST_classification/[^SPX]_valYrs:2_testYrs1_reset:False_scale_run3/reset:False_valYear:[2010, 2011]_testYear:2012.pt
Year: 2010 | Validation Accuracy: 52.78% | Only Long Accuracy: 57.14% | Highest Validation Accuracy: 49.01%
Year: 2011 | Validation Accuracy: 46.43% | Only Long Accuracy: 54.76% | Highest Validation Accuracy: 49.01%
Training Years: 2000~2009 | Training Accuracy: 99.40% | Training OL Accurac

 52%|█████▏    | 11/21 [17:13<15:44, 94.45s/it]

Year: 2010 | Validation Accuracy: 51.59% | Only Long Accuracy: 57.14% | Highest Validation Accuracy: 54.56%
Year: 2011 | Validation Accuracy: 47.22% | Only Long Accuracy: 54.76% | Highest Validation Accuracy: 54.56%
Training Years: 2000~2009 | Training Accuracy: 99.92% | Training OL Accuracy: 52.21%
Year: 2010~2011 Validation Accuracy: 49.40% | Only Long Accuracy: 55.95% | Highest Validation Accuracy: 54.56%
Year: 2012 | Test Accuracy: 54.80% | Only Long Accuracy: 52.80%
------------------------------------------------
***********************************************************
Highest Validation Accuracy in 2010~2011: 54.56% | Highest Test Accuracy in 2012: 54.80%
***********************************************************
Model loaded from /content/drive/MyDrive/code/fintransformer/models/patchTST_classification/[^SPX]_valYrs:2_testYrs1_reset:False_scale_run3/reset:False_valYear:[2010, 2011]_testYear:2012.pt
Model Validation Accuracy: 54.56% | Test Accuracy: 51.60%


  checkpoint = torch.load(path, map_location=torch.device(self.device))


Year: 2011 | Validation Accuracy: 48.41% | Only Long Accuracy: 54.76% | Highest Validation Accuracy: 0.00%
Year: 2012 | Validation Accuracy: 52.00% | Only Long Accuracy: 52.80% | Highest Validation Accuracy: 0.00%
Training Years: 2001~2010 | Training Accuracy: 95.43% | Training OL Accuracy: 53.16%
Year: 2011~2012 Validation Accuracy: 50.20% | Only Long Accuracy: 53.78% | Highest Validation Accuracy: 0.00%
Year: 2013 | Test Accuracy: 50.40% | Only Long Accuracy: 58.33%
------------------------------------------------
Model Saved at /content/drive/MyDrive/code/fintransformer/models/patchTST_classification/[^SPX]_valYrs:2_testYrs1_reset:False_scale_run3/reset:False_valYear:[2011, 2012]_testYear:2013.pt
Year: 2011 | Validation Accuracy: 47.22% | Only Long Accuracy: 54.76% | Highest Validation Accuracy: 50.20%
Year: 2012 | Validation Accuracy: 55.20% | Only Long Accuracy: 52.80% | Highest Validation Accuracy: 50.20%
Training Years: 2001~2010 | Training Accuracy: 99.60% | Training OL Accurac

 57%|█████▋    | 12/21 [18:47<14:10, 94.47s/it]

Year: 2012 | Validation Accuracy: 52.00% | Only Long Accuracy: 52.80% | Highest Validation Accuracy: 54.58%
Training Years: 2001~2010 | Training Accuracy: 100.00% | Training OL Accuracy: 53.16%
Year: 2011~2012 Validation Accuracy: 51.00% | Only Long Accuracy: 53.78% | Highest Validation Accuracy: 54.58%
Year: 2013 | Test Accuracy: 50.79% | Only Long Accuracy: 58.33%
------------------------------------------------
***********************************************************
Highest Validation Accuracy in 2011~2012: 54.58% | Highest Test Accuracy in 2013: 50.79%
***********************************************************
Model loaded from /content/drive/MyDrive/code/fintransformer/models/patchTST_classification/[^SPX]_valYrs:2_testYrs1_reset:False_scale_run3/reset:False_valYear:[2011, 2012]_testYear:2013.pt
Model Validation Accuracy: 54.58% | Test Accuracy: 51.19%


  checkpoint = torch.load(path, map_location=torch.device(self.device))


Year: 2012 | Validation Accuracy: 54.80% | Only Long Accuracy: 52.80% | Highest Validation Accuracy: 0.00%
Year: 2013 | Validation Accuracy: 50.79% | Only Long Accuracy: 58.33% | Highest Validation Accuracy: 0.00%
Training Years: 2002~2011 | Training Accuracy: 94.76% | Training OL Accuracy: 53.83%
Year: 2012~2013 Validation Accuracy: 52.79% | Only Long Accuracy: 55.58% | Highest Validation Accuracy: 0.00%
Year: 2014 | Test Accuracy: 54.76% | Only Long Accuracy: 57.14%
------------------------------------------------
Model Saved at /content/drive/MyDrive/code/fintransformer/models/patchTST_classification/[^SPX]_valYrs:2_testYrs1_reset:False_scale_run3/reset:False_valYear:[2012, 2013]_testYear:2014.pt
Year: 2012 | Validation Accuracy: 55.60% | Only Long Accuracy: 52.80% | Highest Validation Accuracy: 52.79%
Year: 2013 | Validation Accuracy: 53.17% | Only Long Accuracy: 58.33% | Highest Validation Accuracy: 52.79%
Training Years: 2002~2011 | Training Accuracy: 99.56% | Training OL Accurac

 62%|██████▏   | 13/21 [20:21<12:35, 94.46s/it]

Year: 2012 | Validation Accuracy: 54.80% | Only Long Accuracy: 52.80% | Highest Validation Accuracy: 55.98%
Year: 2013 | Validation Accuracy: 53.57% | Only Long Accuracy: 58.33% | Highest Validation Accuracy: 55.98%
Training Years: 2002~2011 | Training Accuracy: 100.00% | Training OL Accuracy: 53.83%
Year: 2012~2013 Validation Accuracy: 54.18% | Only Long Accuracy: 55.58% | Highest Validation Accuracy: 55.98%
Year: 2014 | Test Accuracy: 49.21% | Only Long Accuracy: 57.14%
------------------------------------------------
***********************************************************
Highest Validation Accuracy in 2012~2013: 55.98% | Highest Test Accuracy in 2014: 49.21%
***********************************************************
Model loaded from /content/drive/MyDrive/code/fintransformer/models/patchTST_classification/[^SPX]_valYrs:2_testYrs1_reset:False_scale_run3/reset:False_valYear:[2012, 2013]_testYear:2014.pt
Model Validation Accuracy: 55.98% | Test Accuracy: 51.59%


  checkpoint = torch.load(path, map_location=torch.device(self.device))


Year: 2013 | Validation Accuracy: 55.16% | Only Long Accuracy: 58.33% | Highest Validation Accuracy: 0.00%
Year: 2014 | Validation Accuracy: 52.78% | Only Long Accuracy: 57.14% | Highest Validation Accuracy: 0.00%
Training Years: 2003~2012 | Training Accuracy: 95.71% | Training OL Accuracy: 54.67%
Year: 2013~2014 Validation Accuracy: 53.97% | Only Long Accuracy: 57.74% | Highest Validation Accuracy: 0.00%
Year: 2015 | Test Accuracy: 49.60% | Only Long Accuracy: 47.22%
------------------------------------------------
Model Saved at /content/drive/MyDrive/code/fintransformer/models/patchTST_classification/[^SPX]_valYrs:2_testYrs1_reset:False_scale_run3/reset:False_valYear:[2013, 2014]_testYear:2015.pt
Year: 2013 | Validation Accuracy: 50.40% | Only Long Accuracy: 58.33% | Highest Validation Accuracy: 53.97%
Year: 2014 | Validation Accuracy: 53.17% | Only Long Accuracy: 57.14% | Highest Validation Accuracy: 53.97%
Training Years: 2003~2012 | Training Accuracy: 99.28% | Training OL Accurac

 67%|██████▋   | 14/21 [21:56<11:00, 94.38s/it]

Year: 2013 | Validation Accuracy: 55.16% | Only Long Accuracy: 58.33% | Highest Validation Accuracy: 54.56%
Year: 2014 | Validation Accuracy: 53.17% | Only Long Accuracy: 57.14% | Highest Validation Accuracy: 54.56%
Training Years: 2003~2012 | Training Accuracy: 100.00% | Training OL Accuracy: 54.67%
Year: 2013~2014 Validation Accuracy: 54.17% | Only Long Accuracy: 57.74% | Highest Validation Accuracy: 54.56%
Year: 2015 | Test Accuracy: 52.38% | Only Long Accuracy: 47.22%
------------------------------------------------
***********************************************************
Highest Validation Accuracy in 2013~2014: 54.56% | Highest Test Accuracy in 2015: 52.38%
***********************************************************
Model loaded from /content/drive/MyDrive/code/fintransformer/models/patchTST_classification/[^SPX]_valYrs:2_testYrs1_reset:False_scale_run3/reset:False_valYear:[2013, 2014]_testYear:2015.pt
Model Validation Accuracy: 54.56% | Test Accuracy: 53.97%


  checkpoint = torch.load(path, map_location=torch.device(self.device))


Year: 2014 | Validation Accuracy: 52.78% | Only Long Accuracy: 57.14% | Highest Validation Accuracy: 0.00%
Year: 2015 | Validation Accuracy: 51.19% | Only Long Accuracy: 47.22% | Highest Validation Accuracy: 0.00%
Training Years: 2004~2013 | Training Accuracy: 94.72% | Training OL Accuracy: 55.07%
Year: 2014~2015 Validation Accuracy: 51.98% | Only Long Accuracy: 52.18% | Highest Validation Accuracy: 0.00%
Year: 2016 | Test Accuracy: 49.60% | Only Long Accuracy: 51.98%
------------------------------------------------
Model Saved at /content/drive/MyDrive/code/fintransformer/models/patchTST_classification/[^SPX]_valYrs:2_testYrs1_reset:False_scale_run3/reset:False_valYear:[2014, 2015]_testYear:2016.pt
Year: 2014 | Validation Accuracy: 52.38% | Only Long Accuracy: 57.14% | Highest Validation Accuracy: 51.98%
Year: 2015 | Validation Accuracy: 48.81% | Only Long Accuracy: 47.22% | Highest Validation Accuracy: 51.98%
Training Years: 2004~2013 | Training Accuracy: 99.40% | Training OL Accurac

 71%|███████▏  | 15/21 [23:30<09:26, 94.44s/it]

Year: 2014 | Validation Accuracy: 52.38% | Only Long Accuracy: 57.14% | Highest Validation Accuracy: 53.17%
Year: 2015 | Validation Accuracy: 51.59% | Only Long Accuracy: 47.22% | Highest Validation Accuracy: 53.17%
Training Years: 2004~2013 | Training Accuracy: 100.00% | Training OL Accuracy: 55.07%
Year: 2014~2015 Validation Accuracy: 51.98% | Only Long Accuracy: 52.18% | Highest Validation Accuracy: 53.17%
Year: 2016 | Test Accuracy: 52.38% | Only Long Accuracy: 51.98%
------------------------------------------------
***********************************************************
Highest Validation Accuracy in 2014~2015: 53.17% | Highest Test Accuracy in 2016: 52.38%
***********************************************************
Model loaded from /content/drive/MyDrive/code/fintransformer/models/patchTST_classification/[^SPX]_valYrs:2_testYrs1_reset:False_scale_run3/reset:False_valYear:[2014, 2015]_testYear:2016.pt
Model Validation Accuracy: 53.17% | Test Accuracy: 47.62%


  checkpoint = torch.load(path, map_location=torch.device(self.device))


Year: 2015 | Validation Accuracy: 52.38% | Only Long Accuracy: 47.22% | Highest Validation Accuracy: 0.00%
Year: 2016 | Validation Accuracy: 47.62% | Only Long Accuracy: 51.98% | Highest Validation Accuracy: 0.00%
Training Years: 2005~2014 | Training Accuracy: 95.51% | Training OL Accuracy: 55.22%
Year: 2015~2016 Validation Accuracy: 50.00% | Only Long Accuracy: 49.60% | Highest Validation Accuracy: 0.00%
Year: 2017 | Test Accuracy: 52.99% | Only Long Accuracy: 56.97%
------------------------------------------------
Model Saved at /content/drive/MyDrive/code/fintransformer/models/patchTST_classification/[^SPX]_valYrs:2_testYrs1_reset:False_scale_run3/reset:False_valYear:[2015, 2016]_testYear:2017.pt
Year: 2015 | Validation Accuracy: 49.21% | Only Long Accuracy: 47.22% | Highest Validation Accuracy: 50.00%
Year: 2016 | Validation Accuracy: 49.60% | Only Long Accuracy: 51.98% | Highest Validation Accuracy: 50.00%
Training Years: 2005~2014 | Training Accuracy: 99.36% | Training OL Accurac

 76%|███████▌  | 16/21 [25:05<07:52, 94.41s/it]

Year: 2015 | Validation Accuracy: 49.60% | Only Long Accuracy: 47.22% | Highest Validation Accuracy: 52.98%
Year: 2016 | Validation Accuracy: 48.41% | Only Long Accuracy: 51.98% | Highest Validation Accuracy: 52.98%
Training Years: 2005~2014 | Training Accuracy: 99.96% | Training OL Accuracy: 55.22%
Year: 2015~2016 Validation Accuracy: 49.01% | Only Long Accuracy: 49.60% | Highest Validation Accuracy: 52.98%
Year: 2017 | Test Accuracy: 49.00% | Only Long Accuracy: 56.97%
------------------------------------------------
***********************************************************
Highest Validation Accuracy in 2015~2016: 52.98% | Highest Test Accuracy in 2017: 49.00%
***********************************************************
Model loaded from /content/drive/MyDrive/code/fintransformer/models/patchTST_classification/[^SPX]_valYrs:2_testYrs1_reset:False_scale_run3/reset:False_valYear:[2015, 2016]_testYear:2017.pt
Model Validation Accuracy: 52.98% | Test Accuracy: 52.19%


  checkpoint = torch.load(path, map_location=torch.device(self.device))


Year: 2016 | Validation Accuracy: 49.21% | Only Long Accuracy: 51.98% | Highest Validation Accuracy: 0.00%
Year: 2017 | Validation Accuracy: 51.39% | Only Long Accuracy: 56.97% | Highest Validation Accuracy: 0.00%
Training Years: 2006~2015 | Training Accuracy: 94.99% | Training OL Accuracy: 54.35%
Year: 2016~2017 Validation Accuracy: 50.30% | Only Long Accuracy: 54.47% | Highest Validation Accuracy: 0.00%
Year: 2018 | Test Accuracy: 52.19% | Only Long Accuracy: 52.59%
------------------------------------------------
Model Saved at /content/drive/MyDrive/code/fintransformer/models/patchTST_classification/[^SPX]_valYrs:2_testYrs1_reset:False_scale_run3/reset:False_valYear:[2016, 2017]_testYear:2018.pt
Year: 2016 | Validation Accuracy: 48.02% | Only Long Accuracy: 51.98% | Highest Validation Accuracy: 50.30%
Year: 2017 | Validation Accuracy: 49.40% | Only Long Accuracy: 56.97% | Highest Validation Accuracy: 50.30%
Training Years: 2006~2015 | Training Accuracy: 99.28% | Training OL Accurac

 81%|████████  | 17/21 [26:39<06:17, 94.43s/it]

Year: 2016 | Validation Accuracy: 44.44% | Only Long Accuracy: 51.98% | Highest Validation Accuracy: 52.49%
Year: 2017 | Validation Accuracy: 48.61% | Only Long Accuracy: 56.97% | Highest Validation Accuracy: 52.49%
Training Years: 2006~2015 | Training Accuracy: 99.88% | Training OL Accuracy: 54.35%
Year: 2016~2017 Validation Accuracy: 46.52% | Only Long Accuracy: 54.47% | Highest Validation Accuracy: 52.49%
Year: 2018 | Test Accuracy: 47.41% | Only Long Accuracy: 52.59%
------------------------------------------------
***********************************************************
Highest Validation Accuracy in 2016~2017: 52.49% | Highest Test Accuracy in 2018: 47.41%
***********************************************************
Model loaded from /content/drive/MyDrive/code/fintransformer/models/patchTST_classification/[^SPX]_valYrs:2_testYrs1_reset:False_scale_run3/reset:False_valYear:[2016, 2017]_testYear:2018.pt
Model Validation Accuracy: 52.49% | Test Accuracy: 49.00%


  checkpoint = torch.load(path, map_location=torch.device(self.device))


Year: 2017 | Validation Accuracy: 50.60% | Only Long Accuracy: 56.97% | Highest Validation Accuracy: 0.00%
Year: 2018 | Validation Accuracy: 44.22% | Only Long Accuracy: 52.59% | Highest Validation Accuracy: 0.00%
Training Years: 2007~2016 | Training Accuracy: 95.08% | Training OL Accuracy: 53.93%
Year: 2017~2018 Validation Accuracy: 47.41% | Only Long Accuracy: 54.78% | Highest Validation Accuracy: 0.00%
Year: 2019 | Test Accuracy: 53.17% | Only Long Accuracy: 59.52%
------------------------------------------------
Model Saved at /content/drive/MyDrive/code/fintransformer/models/patchTST_classification/[^SPX]_valYrs:2_testYrs1_reset:False_scale_run3/reset:False_valYear:[2017, 2018]_testYear:2019.pt
Year: 2017 | Validation Accuracy: 50.60% | Only Long Accuracy: 56.97% | Highest Validation Accuracy: 47.41%
Year: 2018 | Validation Accuracy: 47.81% | Only Long Accuracy: 52.59% | Highest Validation Accuracy: 47.41%
Training Years: 2007~2016 | Training Accuracy: 99.32% | Training OL Accurac

 86%|████████▌ | 18/21 [28:14<04:43, 94.53s/it]

Year: 2017 | Validation Accuracy: 50.20% | Only Long Accuracy: 56.97% | Highest Validation Accuracy: 53.78%
Year: 2018 | Validation Accuracy: 52.99% | Only Long Accuracy: 52.59% | Highest Validation Accuracy: 53.78%
Training Years: 2007~2016 | Training Accuracy: 99.96% | Training OL Accuracy: 53.93%
Year: 2017~2018 Validation Accuracy: 51.59% | Only Long Accuracy: 54.78% | Highest Validation Accuracy: 53.78%
Year: 2019 | Test Accuracy: 49.21% | Only Long Accuracy: 59.52%
------------------------------------------------
***********************************************************
Highest Validation Accuracy in 2017~2018: 53.78% | Highest Test Accuracy in 2019: 49.21%
***********************************************************
Model loaded from /content/drive/MyDrive/code/fintransformer/models/patchTST_classification/[^SPX]_valYrs:2_testYrs1_reset:False_scale_run3/reset:False_valYear:[2017, 2018]_testYear:2019.pt
Model Validation Accuracy: 53.78% | Test Accuracy: 53.17%


  checkpoint = torch.load(path, map_location=torch.device(self.device))


Year: 2018 | Validation Accuracy: 50.60% | Only Long Accuracy: 52.59% | Highest Validation Accuracy: 0.00%
Year: 2019 | Validation Accuracy: 51.19% | Only Long Accuracy: 59.52% | Highest Validation Accuracy: 0.00%
Training Years: 2008~2017 | Training Accuracy: 95.39% | Training OL Accuracy: 54.17%
Year: 2018~2019 Validation Accuracy: 50.89% | Only Long Accuracy: 56.06% | Highest Validation Accuracy: 0.00%
Year: 2020 | Test Accuracy: 55.73% | Only Long Accuracy: 57.31%
------------------------------------------------
Model Saved at /content/drive/MyDrive/code/fintransformer/models/patchTST_classification/[^SPX]_valYrs:2_testYrs1_reset:False_scale_run3/reset:False_valYear:[2018, 2019]_testYear:2020.pt
Year: 2018 | Validation Accuracy: 50.60% | Only Long Accuracy: 52.59% | Highest Validation Accuracy: 50.89%
Year: 2019 | Validation Accuracy: 54.37% | Only Long Accuracy: 59.52% | Highest Validation Accuracy: 50.89%
Training Years: 2008~2017 | Training Accuracy: 99.48% | Training OL Accurac

 90%|█████████ | 19/21 [29:48<03:09, 94.51s/it]

Year: 2018 | Validation Accuracy: 49.40% | Only Long Accuracy: 52.59% | Highest Validation Accuracy: 55.47%
Year: 2019 | Validation Accuracy: 50.79% | Only Long Accuracy: 59.52% | Highest Validation Accuracy: 55.47%
Training Years: 2008~2017 | Training Accuracy: 100.00% | Training OL Accuracy: 54.17%
Year: 2018~2019 Validation Accuracy: 50.10% | Only Long Accuracy: 56.06% | Highest Validation Accuracy: 55.47%
Year: 2020 | Test Accuracy: 53.75% | Only Long Accuracy: 57.31%
------------------------------------------------
***********************************************************
Highest Validation Accuracy in 2018~2019: 55.47% | Highest Test Accuracy in 2020: 53.75%
***********************************************************
Model loaded from /content/drive/MyDrive/code/fintransformer/models/patchTST_classification/[^SPX]_valYrs:2_testYrs1_reset:False_scale_run3/reset:False_valYear:[2018, 2019]_testYear:2020.pt
Model Validation Accuracy: 55.47% | Test Accuracy: 55.73%


  checkpoint = torch.load(path, map_location=torch.device(self.device))


Year: 2019 | Validation Accuracy: 53.97% | Only Long Accuracy: 59.52% | Highest Validation Accuracy: 0.00%
Year: 2020 | Validation Accuracy: 57.31% | Only Long Accuracy: 57.31% | Highest Validation Accuracy: 0.00%
Training Years: 2009~2018 | Training Accuracy: 94.91% | Training OL Accuracy: 54.45%
Year: 2019~2020 Validation Accuracy: 55.64% | Only Long Accuracy: 58.42% | Highest Validation Accuracy: 0.00%
Year: 2021 | Test Accuracy: 51.19% | Only Long Accuracy: 56.75%
------------------------------------------------
Model Saved at /content/drive/MyDrive/code/fintransformer/models/patchTST_classification/[^SPX]_valYrs:2_testYrs1_reset:False_scale_run3/reset:False_valYear:[2019, 2020]_testYear:2021.pt
Year: 2019 | Validation Accuracy: 55.16% | Only Long Accuracy: 59.52% | Highest Validation Accuracy: 55.64%
Year: 2020 | Validation Accuracy: 59.68% | Only Long Accuracy: 57.31% | Highest Validation Accuracy: 55.64%
Training Years: 2009~2018 | Training Accuracy: 99.64% | Training OL Accurac

 95%|█████████▌| 20/21 [31:23<01:34, 94.54s/it]

Year: 2020 | Validation Accuracy: 57.31% | Only Long Accuracy: 57.31% | Highest Validation Accuracy: 58.61%
Training Years: 2009~2018 | Training Accuracy: 99.96% | Training OL Accuracy: 54.45%
Year: 2019~2020 Validation Accuracy: 55.64% | Only Long Accuracy: 58.42% | Highest Validation Accuracy: 58.61%
Year: 2021 | Test Accuracy: 50.79% | Only Long Accuracy: 56.75%
------------------------------------------------
***********************************************************
Highest Validation Accuracy in 2019~2020: 58.61% | Highest Test Accuracy in 2021: 50.79%
***********************************************************
Model loaded from /content/drive/MyDrive/code/fintransformer/models/patchTST_classification/[^SPX]_valYrs:2_testYrs1_reset:False_scale_run3/reset:False_valYear:[2019, 2020]_testYear:2021.pt
Model Validation Accuracy: 58.61% | Test Accuracy: 50.79%


  checkpoint = torch.load(path, map_location=torch.device(self.device))


Year: 2020 | Validation Accuracy: 56.92% | Only Long Accuracy: 57.31% | Highest Validation Accuracy: 0.00%
Year: 2021 | Validation Accuracy: 53.97% | Only Long Accuracy: 56.75% | Highest Validation Accuracy: 0.00%
Training Years: 2010~2019 | Training Accuracy: 95.27% | Training OL Accuracy: 54.85%
Year: 2020~2021 Validation Accuracy: 55.45% | Only Long Accuracy: 57.03% | Highest Validation Accuracy: 0.00%
Year: 2022 | Test Accuracy: 46.61% | Only Long Accuracy: 43.03%
------------------------------------------------
Model Saved at /content/drive/MyDrive/code/fintransformer/models/patchTST_classification/[^SPX]_valYrs:2_testYrs1_reset:False_scale_run3/reset:False_valYear:[2020, 2021]_testYear:2022.pt
Year: 2020 | Validation Accuracy: 57.31% | Only Long Accuracy: 57.31% | Highest Validation Accuracy: 55.45%
Year: 2021 | Validation Accuracy: 51.98% | Only Long Accuracy: 56.75% | Highest Validation Accuracy: 55.45%
Training Years: 2010~2019 | Training Accuracy: 99.05% | Training OL Accurac

100%|██████████| 21/21 [32:57<00:00, 94.19s/it]

Year: 2020 | Validation Accuracy: 57.71% | Only Long Accuracy: 57.31% | Highest Validation Accuracy: 58.02%
Year: 2021 | Validation Accuracy: 51.59% | Only Long Accuracy: 56.75% | Highest Validation Accuracy: 58.02%
Training Years: 2010~2019 | Training Accuracy: 99.96% | Training OL Accuracy: 54.85%
Year: 2020~2021 Validation Accuracy: 54.65% | Only Long Accuracy: 57.03% | Highest Validation Accuracy: 58.02%
Year: 2022 | Test Accuracy: 45.82% | Only Long Accuracy: 43.03%
------------------------------------------------
***********************************************************
Highest Validation Accuracy in 2020~2021: 58.02% | Highest Test Accuracy in 2022: 45.82%
***********************************************************

Validation Accuracy in 2000~2001 : 55.40% | Test Accuracy in 2002: 48.81%
Validation Accuracy in 2001~2002 : 55.00% | Test Accuracy in 2003: 50.00%
Validation Accuracy in 2002~2003 : 52.78% | Test Accuracy in 2004: 47.22%
Validation Accuracy in 2003~2004 : 53.37% |


