In [None]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sys
import os

import warnings
import logging
logging.disable(logging.CRITICAL)
from tqdm.autonotebook import tqdm

import torch
from torch.nn import MSELoss, CrossEntropyLoss
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor, ModelCheckpoint
import pytorch_lightning as pl

import optuna
from optuna.integration import PyTorchLightningPruningCallback

from darts import TimeSeries, concatenate
from darts.models import NHiTSModel
from darts.dataprocessing.transformers import Scaler
from darts.metrics import smape, rmse

from sklearn.model_selection import train_test_split

In [None]:
print(torch.backends.mps.is_available())

In [None]:
%run ../utils/preprocessing.ipynb
%run ../utils/losses.ipynb

In [None]:
mps_device = torch.device("mps")

if torch.backends.mps.is_available():
    mps_device = torch.device("mps")
    accelerator="mps"
else:
    print ("MPS device not found.")
    accelerator="cpu"

In [None]:
def make_predictions(model, scaled_splits_data, scaled_full_data, input_len, output_len, limit=None):
    range_len = len(scaled_splits_data['scaled_y_test'])
    predictions = []
    predictions_count = 0

    for i in range(0, range_len, output_len):
        beginning_idx = len(scaled_splits_data['scaled_y_train']) + len(scaled_splits_data['scaled_y_val']) - input_len + i
        end_idx = len(scaled_splits_data['scaled_y_train']) + len(scaled_splits_data['scaled_y_val']) + i

        pred = model.predict(
            n=output_len,
            series=scaled_full_data['scaled_y_full'][beginning_idx:end_idx],
            past_covariates=scaled_full_data['scaled_X_full'][beginning_idx:end_idx],
            n_jobs=-1,
            verbose=False,
        )
        
        predictions.append(pred)
        predictions_count += 1
        
        if limit is not None and predictions_count >= limit:
            break

    individual_pred = concatenate(predictions)
    return individual_pred

In [None]:
def pred_actual(actual, prediction):    
    dfY = pd.DataFrame()
    dfY[["mid_close", 'spread', 'vol']] = TimeSeries.pd_dataframe(actual)
    dfY["prediction"] = TimeSeries.pd_series(prediction)
    
    min_vol, max_vol = dfY['vol'].min(), dfY['vol'].max()
    dfY['normalized_volume'] = (dfY['vol'] - min_vol) / (max_vol - min_vol)

        
    return dfY

In [None]:
TICKERS = ['QCOM', 'NVDA', 'AMZN', 'MSFT', 'GOOG', 'TSLA', 'AMD', 'INTC', 'NFLX', 'BAC', 'WFC', 'GS', 'MA', 'SQ', 'PYPL']
FREQUENCIES = [15, 5]

#TICKERS = ['QCOM']
#FREQUENCIES = [15]

In [None]:
%%capture
for frequency in FREQUENCIES:
    for ticker in TICKERS:

        DATA_FREQUENCY = minute_frequencies_conventions[frequency]
        MODEL_NAME = f'{ticker}_{frequency}_TFT'
        
        OUTPUT_LEN = 1
        INPUT_LEN = 70 if frequency == 15 else 270

        
        model = NHiTSModel.load_from_checkpoint(MODEL_NAME, work_dir=f'/Users/work/repos/diplomka/tft/saved_models/{frequency}/')

        stock = load_stock_data(f'../data/resampled_data/{DATA_FREQUENCY}/{ticker}_resampled_{DATA_FREQUENCY}.csv', frequency)
        
        X_y_df = separate(stock)
        
        splits = split_data(**X_y_df)
        
        ts_splits = transform_splits_to_time_series(**splits)
        
        ts_full = transform_to_time_series(**X_y_df)
        
        scaled_splits_data = scale_splits_data(**ts_splits)
        
        scaled_full_data = scale_full_data(ts_full['ts_X_full'], ts_full['ts_y_full'], scaled_splits_data['scaler_X'], scaled_splits_data['scaler_y'])
        
        stock_full = {
            "ticker": ticker,
            "stock": stock,
            "splits": splits,
            "ts_splits": ts_splits,
            "ts_full": ts_full,
            "scaled_splits_data": scaled_splits_data,
            "scaled_full_data": scaled_full_data
        }


        individual_pred = make_predictions(model, scaled_splits_data, scaled_full_data, INPUT_LEN, OUTPUT_LEN);
        pred_unscaled = scaled_splits_data['scaler_y'].inverse_transform(individual_pred)

        actual = stock_full['ts_splits']['ts_X_test'][['close', 'spread', 'vol']]
        dfY = pred_actual(actual, pred_unscaled)

        dfY.to_csv(f'saved_models/{frequency}/{MODEL_NAME}/backtesting_dataftame.csv')