In [None]:
#### Some of the implementation of the full transformer architecture was adapted from the imperial NLP course
# https://github.com/ImperialNLP/NLPLabs-2022/tree/main/transformers_code_answers

In [None]:
import torch
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import torch.nn as nn
import torch.nn.functional as F
from collections import namedtuple, defaultdict
import math
import random
import yfinance as yf
from pytorch_lightning import seed_everything
import os
from pytorch_lightning.callbacks.early_stopping import EarlyStopping

In [None]:
np.random.seed(1233)
torch.manual_seed(1233)
np.random.seed(1233)
seed_everything(1233, workers=True)
os.environ["CUBLAS_WORKSPACE_CONFIG"]=":4096:2"

In [None]:
device = ("cuda:0" if torch.cuda.is_available else "cpu")

#### model

In [None]:
class AbsolutePositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.3, max_seq_len=200, device="cpu"):
        super().__init__()
        self.d_model = d_model
        self.dropout = nn.Dropout(dropout)

        pe = torch.zeros(max_seq_len, d_model).to(device)
        pos = torch.arange(0, max_seq_len).unsqueeze(1).float()

        two_i = torch.arange(0, d_model, step=2).float()
        div_term = torch.pow(10000, (two_i/torch.Tensor([d_model]))).float()
        pe[:, 0::2] = torch.sin(pos/div_term)
        pe[:, 1::2] = torch.cos(pos/div_term)

        pe = pe.unsqueeze(0)

        self.register_buffer("pe", pe)

    def forward(self, x):
        # shape(x) = [B x seq_len x D]
        pe = self.pe[:, :x.shape[1]].detach()
        x = x.add(pe)
        # shape(x) = [B x seq_len x D]
        return self.dropout(x)

In [None]:
class Time2VecPositonalEncoding(nn.Module):
    def __init__(self,input_dim, output_dim, activation="sine"):
        super().__init__()
        self.linear_layer = nn.Linear(input_dim, input_dim)
        self.periodic_layer = nn.Linear(input_dim, output_dim - input_dim)
        self.activation = activation

    def forward(self, x):

        if self.activation == "sine":
            periodic_out = torch.sin(self.periodic_layer(x))
        elif self.activation == "cos":
            periodic_out = torch.cos(self.periodic_layer(x))

        
        original_out = self.linear_layer(x)

        out = torch.cat([periodic_out, original_out], 2)
        
        return out

In [None]:
class TimeSeriesTransformer(nn.Module):
    def __init__(self, config):
        super().__init__()

        dropout = config["dropout"]
        device = config["device"]
        max_seq_len = config["max_seq_len"]
        self.dropout = nn.Dropout(p=dropout)

        input_dim, d_model = config["input_dim"], config["d_model"]
        self.inputProjection = nn.Linear(input_dim, d_model)
        self.targetProjection = nn.Linear(input_dim, d_model)


        n_head, num_enc_layers, num_dec_layers = config["n_head"], config["num_enc_layers"], config["num_dec_layers"]

        self.transformer = nn.Transformer(d_model, n_head, num_enc_layers, num_dec_layers, 
                dim_feedforward= 4 * d_model, dropout=dropout, batch_first=True, device=device) 
        
        
        self.encoderLayer = nn.TransformerEncoderLayer(d_model, n_head, dim_feedforward=4 * d_model, dropout=dropout, batch_first=True)

        self.encoder = nn.TransformerEncoder(self.encoderLayer, num_enc_layers)
        steps_ahead = config["steps_ahead"]

        self.decoder = nn.Linear(d_model, steps_ahead)

        self.linear = nn.Linear(d_model, steps_ahead)
        self.d_model = d_model

        self.useInputProjection = config["use_input_projections"]

        self.isClassification = config["is_classification"]
        training_len = config["training_len"]
        

        if config["use_absolute_enc"]:
            self.inputPosEncoding = AbsolutePositionalEncoding(d_model, dropout, max_seq_len, device)
            self.outputPosEncoding = AbsolutePositionalEncoding(d_model, dropout, max_seq_len, device)
        else:
            t2v_activation = config["activation"]
            self.inputPosEncoding = Time2VecPositonalEncoding(input_dim, d_model, t2v_activation)
            self.outputPosEncoding = Time2VecPositonalEncoding(input_dim, d_model, t2v_activation)
    
        for p in self.parameters():
            if p.dim() > 1:
                nn.init.xavier_uniform_(p)
        
    def create_mask(self, size):
        return torch.triu(torch.ones(size, size) * float('-inf'), diagonal=1)

    def encoder_only_forward(self, src, src_mask):
        # here, src includes prices
        # with teacher forcing
        prediction = self.encoder(src, src_mask)
        
        return prediction
    
    def forward(self, src, trg): 

        src_mask = self.create_mask(src.shape[1])
        src_mask = src_mask.type_as(src)
        trg_mask = self.create_mask(trg.shape[1])
        trg_mask = trg_mask.type_as(trg)
        
        if self.useInputProjection:
            src = self.inputProjection(src)
            trg = self.targetProjection(trg)


        src = self.inputPosEncoding(src)   # shape: [B * training_len * d_model]
        out = self.encoder_only_forward(src, src_mask)  # shape: [B * training_len * d_model]
        out = self.decoder(out)  # shape: [B * training_len * 1]

        if self.isClassification:
            out = F.log_softmax(out)
        return out


In [None]:
def r2_loss(output, target):
    target_mean = torch.mean(target)
    ss_tot = torch.sum((target - target_mean) ** 2)
    ss_res = torch.sum((target - output) ** 2)
    r2 = 1 - ss_res / ss_tot
    return r2

In [None]:
def flatten(t):
    return [item for sublist in t for item in sublist]

def get_prediction(t):
    return [arr.view(-1) for arr in t]

In [None]:
class TransformerTrainer(pl.LightningModule):
    def __init__(self, config):
        super().__init__()
        self.save_hyperparameters()

        self.training_len = config["training_len"]

        config["device"] = self.hparams.config['device']
        self.model = TimeSeriesTransformer(config)

        self.d_model= config["d_model"]
        self.warmup_steps = config["warmup_steps"]

        self.lr = config["lr"]
        self.device_ = self.hparams.config['device']
        self.forecast_window = config["forecast_window"]
        
        self.sampling = config["sampling"]

        self.k = config["k"]

        if config["loss"] == 'mse':
            self.criterion = nn.MSELoss(reduction='sum')
        else:
            self.criterion = r2_loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=self.lr)

    def change_lr_in_optimizer(self):
        min_arg1 = math.sqrt(1/(self.global_step+1))
        min_arg2 = self.global_step * (self.warmup_steps**-1.5)
        lr = math.sqrt(1/self.d_model) * min(min_arg1, min_arg2)
        self.trainer.lightning_optimizers[0].param_groups[0]['lr'] = lr
    
    def inference(self, src, trg):
    
        next_input = src
        all_preds = []

        for i in range(self.forecast_window - 1):
            pred = self.model(next_input, trg)

            if all_preds == []:
                all_preds = pred[:, -1, :].detach()
            else:
                all_preds = torch.cat((all_preds, pred[:, -1, :].detach()), dim=1)
            
            next_input = torch.cat((src[:, i + 1:, :].detach(), pred[:, -1, :].detach()), dim=1)
        
        return all_preds
    
    def prob_get_true_val(self, p):
        return random.random() < p
    
    def n_step_forward(self, src, trg):
        sampled_input = src
        all_preds = []

        for i in range(self.forecast_window):
            pred = self.model(sampled_input, trg)
            pred = pred[:, -1, :]

            if all_preds == []:
                all_preds = pred
            else:
                all_preds = torch.cat((all_preds, pred), dim=1)

            p = self.k / (self.k + math.exp(self.current_epoch/self.k))

            prob_true_val = True
            if self.sampling:
                prob_true_val = self.prob_get_true_val(p)
            
            if prob_true_val:
                sampled_input = torch.cat((sampled_input[:, 1:, :].detach(), trg[:, i, :].unsqueeze(-1).detach()), dim=1)
            else:
                sampled_input = torch.cat((sampled_input[:, 1:, :].detach(), pred.unsqueeze(-1).detach()), dim=1)
        
            
        return all_preds
        
    def training_step(self, batch, batch_idx):
        src, trg_in, targets = batch

        y_hat = self.n_step_forward(src, trg_in)
        y = targets

        loss = self.criterion(y_hat, y)

        self.log("train_loss", loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)

        pearson_corr = self.calc_pearson_coeff(y_hat, y)

        self.log("train_pearson_coef", pearson_corr ,on_step=True, on_epoch=True, prog_bar=True, logger=True)

        self.change_lr_in_optimizer()
        
        return {'loss': loss}
    

    def training_epoch_end(self, outputs):
        avg_loss = torch.stack(
            [x["loss"].detach() for x in outputs]).mean()

        if self.current_epoch % 5 == 0:
            self.print_predictions(train_loader_unshuffled, train_output_img_dir+"train", "Training")
 
        self.log_dict({"train_loss": avg_loss, 'step': self.current_epoch})

    def print_predictions(self, dataloader, file_prefix, title="Validation"):
        # print predictiion of first 1024 + 100 data points
        with torch.no_grad():
            all_predictions = []
            init_train_data = []
            all_targets = []
            for step, trng_data in enumerate(dataloader):
                if step == 1:
                    break

                src, _, targets = trng_data
                all_targets.append(targets[0, :].detach().cpu())

                if step == 0:
                    init_train_data = src[0, :, 0].reshape(-1)

                prediction = self(trng_data)
                prediction = prediction[0, :]
                all_predictions.append(prediction.detach().cpu())
            
            all_predictions = flatten(get_prediction(all_predictions))
            all_predictions = np.array(all_predictions)
            all_targets = flatten(get_prediction(all_targets))
            all_targets = np.array(all_targets)

            # inverse transform
            init_train_data = train_px_scaler.inverse_transform(init_train_data.reshape(-1, 1)).reshape(-1)
            all_predictions = train_px_scaler.inverse_transform(all_predictions.reshape(-1, 1)).reshape(-1)
            all_targets = train_px_scaler.inverse_transform(all_targets.reshape(-1, 1)).reshape(-1)
            
            end_plot_idx = self.training_len + len(all_predictions)
            plt.figure(figsize=(8, 6))
            plt.plot(init_train_data, label='trailing')
            plt.plot(np.arange(self.training_len, end_plot_idx), all_predictions, label="predicted")
            plt.plot(np.arange(self.training_len, end_plot_idx), all_targets, label="actual")
            plt.title(f"{title} prediction for epoch {self.current_epoch}")
            plt.legend()
            plt.grid()

            if not os.path.exists(file_prefix):
                os.makedirs(file_prefix)

            plt.savefig(f"{file_prefix}/First_128_preds_Epoch_{self.current_epoch}.jpg", bbox_inches="tight")
            
            plt.close()

    def calc_pearson_coeff(self, y_pred, y):

        predicted = y_pred.contiguous().view(-1)
        targets = y.contiguous().view(-1)
        vy_pred = predicted - torch.mean(predicted)
        vy = targets - torch.mean(targets)
        denom = torch.sum(vy_pred ** 2) * torch.sum(vy ** 2)

        corr = torch.sum(vy_pred * vy) / torch.sqrt(denom)

        return corr

    
    def validation_step(self, batch, batch_idx):
        src, trg_in, targets = batch
 
        y_hat = self.n_step_forward(src, trg_in)
        y = targets
 

        loss = self.criterion(y_hat, y)

        self.log("valid_loss", loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        
        pearson_corr = self.calc_pearson_coeff(y_hat, y)
 
        self.log("val_pearson_coef", pearson_corr ,on_step=True, on_epoch=True, prog_bar=True, logger=True)

        return {'valid_loss_step': loss}

    
    def validation_epoch_end(self, outputs):
        avg_loss = torch.stack(
            [x["valid_loss_step"].detach() for x in outputs]).mean()

        self.log("ptl/val_loss", avg_loss)

        tensorboard_logs = {'val_loss_epoch': avg_loss, 'step': self.current_epoch}

        if self.current_epoch % 5 == 0 and self.current_epoch != 0:
            self.print_predictions(val_loader, train_output_img_dir+"val")

        self.log_dict(tensorboard_logs)

    def forward(self, x):
        src, trg_in, _ = x
        src = src.to(self._device)
        trg_in = trg_in.to(self._device)
        return self.n_step_forward(src, trg_in)
    

#### Create Sequence Dataset

In [None]:
f_multiplier = 1
target_multiplier = 1
class FeaturesEncoderDataset(Dataset):
    def __init__(self, features, targets, training_len, forecast_len):
        super().__init__()
        self.features = features
        self.targets = targets
        self.training_len = training_len
        self.forecast_len = forecast_len
        self.feature_len = len(features)
    
    def __len__(self):
        return self.feature_len - self.training_len - self.forecast_len + 1
    
    def __getitem__(self, idx):

        end_trng_idx = idx + self.training_len
        end_target_idx = end_trng_idx + self.forecast_len

        train_data = self.features[idx:end_trng_idx]
        train_features = f_multiplier * train_data[:, 1:]
        train_prices = target_multiplier * np.expand_dims(train_data[:, 0], axis=1)
        train_features = np.concatenate((train_prices, train_features), axis=1)

        train_features = torch.as_tensor(train_features, dtype=torch.float32)
        # first output to decoder is last input to encoder
        target_features = torch.as_tensor(f_multiplier * self.features[end_trng_idx:end_target_idx], dtype=torch.float32)
        target_values = torch.as_tensor(self.targets[end_trng_idx:end_target_idx], dtype=torch.float32)

        return FeaturesAndTarget(train_features, target_features, target_values) 

#### Import Data

In [None]:
FeaturesAndTarget = namedtuple('FeaturesAndTarget', ['train_features', 'target_features', 'target_values'])

#### YFinance Data

In [None]:
def download_data(codes, start_date, end_date):
    data = yf.download(codes, start_date, end_date)

    if len(codes) == 1:
        data.columns = [data.columns, codes*len(data.columns)]

    return data.dropna()

In [None]:
tickers = ["^GSPC", "AAPL", "MSFT", "NKE", "JPM" , "JNJ",  "BTC-USD"]
ticker = "MSFT"

In [None]:
daily_data = download_data(tickers, "2018-05-02", "2022-05-01")
close_data = daily_data['Adj Close']


In [None]:
# use adj close as prediction!
prices_data = close_data[ticker].values
prices_data = prices_data.reshape(-1)

plt.plot(prices_data)

In [None]:
def create_features(prices_data):

    features = pd.DataFrame(dict(px=prices_data)).assign(
        dpx1 = lambda x: x.px.ewm(span=8).mean() - x.px.ewm(span=32).mean(),
        dpx2 = lambda x: x.px.ewm(span=32).mean() - x.px.ewm(span=64).mean(),
        dpx3 = lambda x: x.px.ewm(64).mean() - x.px.ewm(128).mean(),
        dpx4 = lambda x: x.px.ewm(128).mean() - x.px.ewm(256).mean(),
        dpx5 = lambda x: x.px.ewm(12).mean() - x.px.ewm(26).mean(),
        # dpx1 = lambda x: x.px.ewm(20).mean(),
        # dpx2 = lambda x: x.px.ewm(50).mean(),
        # dpx3 = lambda x: x.px.ewm(100).mean(),
        # adj_close_diff_1 = lambda x: x.px.diff(),
        # adj_close_pct_diff = lambda x: x.px.pct_change(),
        # dpx1 = lambda x: np.diff(x.px, prepend=x.px[0]),
        # dpx2 = lambda x: np.diff(x.px, n=2, prepend=[x.px[0], x.px[0]]),
        # dpx4 = lambda x: x.px.ewm(256).mean() - x.px.ewm(512).mean(), 
    )

    return features

features = create_features(prices_data)

#### Spliting data

In [None]:
def split_data(features):
    train_idx = int(features.shape[0]*0.6)
    val_idx = int(features.shape[0] * 0.2)
    train_features = features[:train_idx]
    val_features = features[train_idx:train_idx + val_idx]
    test_features = features[train_idx + val_idx:]

    return train_features, val_features, test_features

In [None]:
train_features, val_features, test_features = split_data(features)
train_features.head()

#### Parameters

In [None]:
params = dict(
    training_len=60,
    val_len=60,
    max_seq_len=1000, 
    forecast_window=30,

    batch_size=64,
    d_model=128,
    lr = 1e-3,
    steps_ahead=1, 
    input_dim=1, 
    n_head=8, 

    num_enc_layers=2,
    num_dec_layers=4,

    dropout=0.1,
     
    use_absolute_enc=True, 
    use_input_projections=True, 
    is_classification=False,
    device=device, 
    warmup_steps=4000, 
    loss='mse',
    activation="sine",
    scale=True,
    sampling=False,
    k = 30,
    prices_only = True
)

#### Preprocessing data

In [None]:
from sklearn.preprocessing import MinMaxScaler
train_features_scaler = MinMaxScaler(feature_range=(-1, 1))
train_px_scaler = MinMaxScaler(feature_range=(0, 1))

In [None]:
def preprocess_data(train_features, val_features, test_features):

    train_ema = train_features.filter(like='dpx').values
    val_ema = val_features.filter(like='dpx').values
    test_ema = test_features.filter(like='dpx').values

    train_px = train_features['px'].values
    val_px = val_features['px'].values
    test_px = test_features['px'].values
    
    if params['scale']:
        train_features_scaler.fit(train_ema)
        train_px_scaler.fit(train_px.reshape(-1, 1))

    
    if params['scale']:
        f_train_ema = train_features_scaler.transform(train_ema)
        f_val_ema = train_features_scaler.transform(val_ema)
        f_test_ema = train_features_scaler.transform(test_ema)
        f_train_px = train_px_scaler.transform(train_px.reshape(-1, 1))
        f_val_px = train_px_scaler.transform(val_px.reshape(-1, 1))
        f_test_px = train_px_scaler.transform(test_px.reshape(-1, 1))

        f_train_values = np.concatenate([f_train_px, f_train_ema], axis=1)
        f_val_values = np.concatenate([f_val_px, f_val_ema], axis=1)
        f_test_values = np.concatenate([f_test_px, f_test_ema], axis=1)

    # price only data
    if params["prices_only"]:
        f_train_values = f_train_values[:, 0].reshape(-1, 1)
        f_val_values = f_val_values[:, 0].reshape(-1, 1)
        f_test_values = f_test_values[:, 0].reshape(-1, 1)

        f_train_targets = f_train_values.reshape(-1)
        f_val_targets = f_val_values.reshape(-1)
        f_test_targets = f_test_values.reshape(-1) 
    
    else:
        # all values, incl price

        f_train_targets = f_train_values[:, 0].reshape(-1)
        f_val_targets = f_val_values[:, 0].reshape(-1)
        f_test_targets = f_test_values[:, 0].reshape(-1)


    return f_train_values, f_val_values, f_test_values, f_train_targets, f_val_targets, f_test_targets

In [None]:
f_train_values, f_val_values, f_test_values, f_train_targets, f_val_targets, f_test_targets = preprocess_data(train_features, val_features, test_features)

In [None]:
print(f_test_targets.shape)
print(f_val_targets.shape)
print(f_train_targets.shape)

In [None]:
training_len = params['training_len']
forecast_len = params['forecast_window']

In [None]:
import random
def seed_worker(worker_id):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)

gen = torch.Generator()
gen.manual_seed(1233)

In [None]:
batch_size = 64

train_dataset = FeaturesEncoderDataset(f_train_values, f_train_targets, params['training_len'], forecast_len)
val_dataset = FeaturesEncoderDataset(f_val_values, f_val_targets, params['val_len'], forecast_len)
test_dataset = FeaturesEncoderDataset(f_test_values, f_test_targets, params['val_len'], forecast_len)

train_loader = DataLoader(train_dataset, batch_size, shuffle=True, num_workers=8, worker_init_fn=seed_worker, generator=gen)
val_loader = DataLoader(val_dataset, batch_size, shuffle=False, num_workers=8)
test_loader = DataLoader(test_dataset, batch_size, shuffle=False, num_workers=8)
train_loader_unshuffled = DataLoader(train_dataset, batch_size, shuffle=False, num_workers=8)

In [None]:
src, trg_in, trg_out = next(iter(train_loader))
print(src.shape)
print(trg_in.shape)
print(trg_out.shape)

In [None]:
log_dir = "ts_stock_encoder_only_n_step_ahead_logs"
model_dir = "ts_stock_encoder_only_n_step_ahead_models"

device = ("cuda:0" if torch.cuda.is_available else "cpu")

def train(ticker, version_name='', model_name ='', ckpt_dir='w2v'):

    if version_name == '':
        version_name = ticker

    if model_name == '':
        model_name = ticker 

    logger = TensorBoardLogger(
        save_dir=log_dir,
        version=f'{version_name}_{ckpt_dir}'
    )

    checkpoint_callback = ModelCheckpoint(
        monitor="valid_loss_epoch",
        mode="min",
        dirpath=f"{model_dir}/{ckpt_dir}/{model_name}",
        filename="{epoch}-{valid_loss_epoch:.4f}",
        save_last= True,
        save_top_k=2
    )

    early_stopping_callback = EarlyStopping(
        monitor="valid_loss_epoch",
        mode="min",
        patience=100
    )

    metrics = {"loss": "ptl/val_loss"}

    trainer = pl.Trainer(
        max_epochs=150,
        gpus=1,
        logger=logger,
        callbacks=[checkpoint_callback, early_stopping_callback],
        log_every_n_steps=7,
        deterministic=True
    )

    stock_model = TransformerTrainer(params)

    trainer.fit(stock_model, train_loader, val_loader)

    return trainer, stock_model

#### Final model

In [None]:
def get_prediction_value(preds):
    return [x.cpu().detach().numpy() for x in preds]

def get_ith_pred(preds, i = 0):
    return preds[i]

def revert_transform(values):
    return train_px_scaler.inverse_transform(values.reshape(-1, 1)).reshape(-1)

def forecast(stock_model, trainer, ckpt_path, test_loader):
    with torch.no_grad():
        predictions = trainer.predict(dataloaders=test_loader, model=stock_model, ckpt_path=ckpt_path)
    
    predictions = flatten(predictions)
    
    predictions_flattened = np.array(get_prediction_value(get_ith_pred(predictions)))

    return predictions_flattened

def evaluate(pred, y):
    pred = revert_transform(pred)
    y = revert_transform(y)
    
    rmse = calculate_rmse(y, pred)
    mse = rmse ** 2
    mape = calculate_mape(y, pred)

    r = calc_pearson_coeff(pred, y)

    res = dict(
        rmse = rmse,
        mse =mse,
        mape =mape,
        r = r
    )

    return res
    


In [None]:
def calculate_rmse(y_true, y_pred):

    rmse = np.sqrt(np.mean((y_true-y_pred)**2))                   
    return rmse

def calculate_mape(y_true, y_pred): 

    y_pred = np.array(y_pred)
    y_true = np.array(y_true)    
    mape = np.mean(np.abs((y_true-y_pred) / y_true))*100    
    return mape


def calc_pearson_coeff(y_pred, y):

    predicted = y_pred
    targets = y
    vy_pred = predicted - np.mean(predicted)
    vy = targets - np.mean(targets)
    denom = np.sum(vy_pred ** 2) * np.sum(vy ** 2)

    corr = np.sum(vy_pred * vy) / np.sqrt(denom)

    return corr

In [None]:
train_output_img_dir = "train_output_imgs/encoder_only_n_step/w2v/test/"

In [None]:
img_folder = "output_imgs/encoder_only_n_step/w2v/prices_no_sampling/"

def gen_fig(preds, eval_res, targets, img_folder, title, i = 0, savefig=False):

    if not os.path.exists(img_folder):
        os.makedirs(img_folder)

    plt.figure(figsize=(8, 6))

    start_idx = training_len + i
    end_idx = training_len + len(preds) + i

    mse = eval_res["mse"]

    filename = f"{img_folder}{ticker}_results.txt" 
    if savefig:
        if os.path.exists(filename):
            f = open(filename, "a")
        else:
            f = open(filename, "x")

        f.write(f"{ckpt_path}\n")

    for metric in eval_res.keys():
        print(f"Metric {metric}: {eval_res[metric]}")
        if savefig:
            f.write(f"Metric {metric}: {eval_res[metric]}\n")
    

    plt.plot(list(range(start_idx, end_idx)), revert_transform(preds), label="predicted")
    plt.plot(list(range(start_idx, end_idx)), revert_transform(targets), label="target")
    plt.plot(revert_transform(f_test_targets[:training_len + 1]), label="trailing")
    plt.title(f"{title}")
    plt.legend()
    plt.grid()
    
    if savefig:
        plt.savefig(f"{img_folder}{ticker}_{ckpt_dir}.jpg", bbox_inches="tight")

    plt.show()


#### Training model on all stock data

In [None]:
shorter_tickers = ["^GSPC", "AAPL", "MSFT", "NKE", "JPM" , "JNJ" ]
# need to download data for btc-usd!

daily_data = download_data(shorter_tickers, "2018-05-02", "2022-05-01")
close_data = daily_data['Adj Close']

In [None]:
print(params)

In [None]:
shorter_tickers = ["^GSPC", "AAPL", "NKE", "JPM", "JNJ", "MSFT"]
shorter_tickers = ["^GSPC"]

ckpt_dir = "w2v_no_sampling_128"
for v in range(0, 1):
    for ticker in shorter_tickers:
        # prices_data = close_data[ticker].values
        prices_data = pd.read_csv(f"data/{ticker}.csv")
        prices_data = prices_data['Adj Close'].values
        
        features = create_features(prices_data)

        train_features, val_features, test_features = split_data(features)
        f_train_values, f_val_values, f_test_values, f_train_targets, f_val_targets, f_test_targets = preprocess_data(train_features, val_features, test_features)
        
        training_len = 60
        forecast_len = 30

        batch_size = 64

        gen = torch.Generator()
        gen.manual_seed(1233)

        train_dataset = FeaturesEncoderDataset(f_train_values, f_train_targets, params['training_len'], forecast_len)
        val_dataset = FeaturesEncoderDataset(f_val_values, f_val_targets, params['val_len'], forecast_len)
        test_dataset = FeaturesEncoderDataset(f_test_values, f_test_targets, params['val_len'], forecast_len)

        train_loader = DataLoader(train_dataset, batch_size, shuffle=True, num_workers=8, worker_init_fn=seed_worker, generator=gen)
        val_loader = DataLoader(val_dataset, batch_size, shuffle=False, num_workers=8)
        test_loader = DataLoader(test_dataset, batch_size, shuffle=False, num_workers=8)
        train_loader_unshuffled = DataLoader(train_dataset, batch_size, shuffle=False, num_workers=8)
        
        global train_output_img_dir
        
        img_dir = f"train_output_imgs/encoder_only_n_step/{ckpt_dir}/{ticker}_{v}/"
        train_output_img_dir = img_dir
        print(train_output_img_dir)

        trainer, stock_model = train(ticker, version_name=f"{ticker}_{v}", model_name=f"{ticker}_{v}", ckpt_dir=ckpt_dir)



: 

#### Model evaluation

In [None]:
ticker = "^GSPC"

prices_data = close_data[ticker].values
# prices_data = pd.read_csv(f"data/{ticker}.csv")
# prices_data = prices_data['Adj Close'].values
features = create_features( prices_data = close_data[ticker].values)
train_features, val_features, test_features = split_data(features)
f_train_values, f_val_values, f_test_values, f_train_targets, f_val_targets, f_test_targets = preprocess_data(train_features, val_features, test_features)
test_dataset = FeaturesEncoderDataset(f_test_values, f_test_targets, params['val_len'], forecast_len)
test_loader = DataLoader(test_dataset, batch_size, shuffle=False, num_workers=8)

In [None]:
logger = TensorBoardLogger(
    save_dir=log_dir,
    ### TODO: change version when reruun
    version=f'test_res'
)

checkpoint_callback = ModelCheckpoint(
    monitor="valid_loss_epoch",
    mode="min",
    ### TODO: change dir path when rerun
    dirpath=f"{model_dir}/test_res",
    filename="{epoch}-{valid_loss_epoch:.4f}",
    save_last= True,
    save_top_k=2
)

early_stopping_callback = EarlyStopping(
    monitor="valid_loss_epoch",
    mode="min",
    patience=40
)

metrics = {"loss": "ptl/val_loss"}

trainer = pl.Trainer(
    max_epochs=120,
    gpus=1,
    logger=logger,
    callbacks=[checkpoint_callback, early_stopping_callback],
    log_every_n_steps=7,
)

stock_model = TransformerTrainer(params)

In [None]:
params["use_absolute_enc"] = False
params["use_input_projections"] = False
params["sampling"] = True
params["d_model"] = 128

In [None]:
savefig=False

ckpt_dir = "t2v_no_sampling_128"

stock_model = TransformerTrainer(params)
ckpt_path = 'ts_stock_encoder_only_n_step_ahead_models/t2v_with_sampling_threshold/^GSPC_1/epoch=47-valid_loss_epoch=3.4808.ckpt'
all_preds = forecast(stock_model, trainer, ckpt_path, test_loader)
print(all_preds.shape)
targets = f_test_targets[training_len: training_len + forecast_len]
eval_res = evaluate(all_preds, targets)


img_folder = f"output_imgs/encoder_only_n_step/{ckpt_dir}/{ticker}/"
fig_title = f"30-step ahead predictions with encoder-only architecture, w/o sampling for {ticker}"
gen_fig(all_preds, eval_res, targets, img_folder, fig_title, savefig=savefig)