In [None]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sys
import os

import warnings
import logging
logging.disable(logging.CRITICAL)
from tqdm.autonotebook import tqdm

import torch
from torch.nn import MSELoss, CrossEntropyLoss
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor, ModelCheckpoint
import pytorch_lightning as pl

import optuna
from optuna.integration import PyTorchLightningPruningCallback

from darts import TimeSeries, concatenate
from darts.models import NHiTSModel
from darts.dataprocessing.transformers import Scaler
from darts.metrics import smape, rmse

from sklearn.model_selection import train_test_split

In [None]:
print(torch.backends.mps.is_available())

In [None]:
%run ../utils/preprocessing.ipynb
%run ../utils/losses.ipynb

In [None]:
mps_device = torch.device("mps")

if torch.backends.mps.is_available():
    mps_device = torch.device("mps")
    accelerator="mps"
else:
    print ("MPS device not found.")
    accelerator="cpu"

In [None]:
TICKER = 'QCOM'
FREQ_INT = 5
DATA_FREQUENCY = minute_frequencies_conventions[FREQ_INT]
MODEL_NAME = f'NHiTS_{TICKER}_{FREQ_INT}' if FREQ_INT == 15 else f'{TICKER}_{FREQ_INT}_NHiTS'

OUTPUT_LEN = 1
INPUT_LEN = 40 if FREQ_INT == 15 else 40

nhits_dir = f'Users/work/repos/diplomka/nhits/saved_models/{MODEL_NAME}'


In [None]:
model = NHiTSModel.load_from_checkpoint(MODEL_NAME, work_dir=f'/Users/work/repos/diplomka/nhits/saved_models/{FREQ_INT}/')

## Data Load

In [None]:
stock = load_stock_data(f'../data/resampled_data/{DATA_FREQUENCY}/{TICKER}_resampled_{DATA_FREQUENCY}.csv', FREQ_INT)

X_y_df = separate(stock)

splits = split_data(**X_y_df)

ts_splits = transform_splits_to_time_series(**splits)

ts_full = transform_to_time_series(**X_y_df)

scaled_splits_data = scale_splits_data(**ts_splits)

scaled_full_data = scale_full_data(ts_full['ts_X_full'], ts_full['ts_y_full'], scaled_splits_data['scaler_X'], scaled_splits_data['scaler_y'])

stock_full = {
    "ticker": TICKER,
    "stock": stock,
    "splits": splits,
    "ts_splits": ts_splits,
    "ts_full": ts_full,
    "scaled_splits_data": scaled_splits_data,
    "scaled_full_data": scaled_full_data
}


### Individual prediction

In [None]:
def make_predictions(model, scaled_splits_data, scaled_full_data, input_len, output_len, limit=None):
    range_len = len(scaled_splits_data['scaled_y_test'])
    predictions = []
    predictions_count = 0

    for i in range(0, range_len, output_len):
        beginning_idx = len(scaled_splits_data['scaled_y_train']) + len(scaled_splits_data['scaled_y_val']) - input_len + i
        end_idx = len(scaled_splits_data['scaled_y_train']) + len(scaled_splits_data['scaled_y_val']) + i

        pred = model.predict(
            n=output_len,
            series=scaled_full_data['scaled_y_full'][beginning_idx:end_idx],
            past_covariates=scaled_full_data['scaled_X_full'][beginning_idx:end_idx],
            n_jobs=-1,
            verbose=False,
        )
        
        predictions.append(pred)
        predictions_count += 1
        
        if limit is not None and predictions_count >= limit:
            break

    individual_pred = concatenate(predictions)
    return individual_pred

In [None]:
%%capture
individual_pred = make_predictions(model, scaled_splits_data, scaled_full_data, INPUT_LEN, OUTPUT_LEN);

In [None]:
pred_unscaled = scaled_splits_data['scaler_y'].inverse_transform(individual_pred)

### Create the predictions dataframe for backtesting

In [None]:
def pred_actual(actual, prediction):    
    dfY = pd.DataFrame()
    dfY[["mid_close", 'spread', 'vol']] = TimeSeries.pd_dataframe(actual)
    dfY["prediction"] = TimeSeries.pd_series(prediction)
    
    min_vol, max_vol = dfY['vol'].min(), dfY['vol'].max()
    dfY['normalized_volume'] = (dfY['vol'] - min_vol) / (max_vol - min_vol)

        
    return dfY

actual = stock_full['ts_splits']['ts_X_test'][['close', 'spread', 'vol']]
dfY = pred_actual(actual, pred_unscaled)

In [None]:
dfY

In [None]:
print("RMSE: {:.2f} units".format(rmse_loss(dfY['mid_close'], dfY['prediction'])))
#print("SMAPE: {:.2f}%".format(smape_loss(dfY['mid_close'], dfY['prediction'])))

### Plot the forecast, ADD VOLUME, transparency

In [None]:
plt.style.use('ggplot')

def plot_dual_axis(df):
    fig, ax1 = plt.subplots(figsize=(40, 7))

    color = 'tab:blue'
    prediction_color = 'tab:green'

    ax1.set_xlabel('Time')
    ax1.set_ylabel('Price', color=color)
    ax1.plot(df.index, df['mid_close'], color=color, alpha=0.8, linewidth=1, label='Actual Price')
    ax1.plot(df.index, df['prediction'], color=prediction_color, linewidth=1, alpha=0.8, label='Predicted Price')
    ax1.tick_params(axis='y', labelcolor=color)
    
    ax1.legend(loc='upper left')  

    ax2 = ax1.twinx()
    color = 'tab:red'
    ax2.set_ylabel('Volume', color=color)
    ax2.bar(df.index, df['normalized_volume'], alpha=0.2, color=color, label='Volume')
    ax2.tick_params(axis='y', labelcolor=color)

    ax2.legend(loc='upper right')

    ax1.grid(True, which='both', linestyle='--', linewidth=0.5)

    fig.tight_layout()
    plt.show()

# Assuming dfY is the DataFrame you provided
plot_dual_axis(dfY)

### Backtesting strategy

In [None]:
%run ../utils/backtesting.ipynb

In [None]:
buy_throshold=0
sell_throshold=0


active_portfolio_df, active_capital_df = active_strategy(
    dfY, 
    capital=10000, 
    shares_capital=5000, 
    investment=10, 
    commission=0, 
    buy_threshold=buy_throshold, 
    sell_threshold=sell_throshold
)

In [None]:
passive_portfolio_df, passivecapital_df = passive_strategy(
    dfY,
    capital=10000, 
    shares=0, 
    commission=0.001, 
    n=(60/FREQ_INT)*6.5*7
)

### Plot the portfolio value from backtesting

In [None]:
plt.figure(figsize=(20, 6))

color_active = 'tab:blue'
color_passive = 'tab:green'


plt.plot(active_portfolio_df['Portfolio Value'], color=color_active, linewidth=1, label='Active Portfolio Value')
plt.plot(passive_portfolio_df['Portfolio Value'], color=color_passive, linewidth=1, label='Passive Portfolio Value')
plt.legend(loc='upper left')  



plt.title(f'{TICKER}: Portfolio Value Over Time')
plt.xlabel('Time')
plt.ylabel('Portfolio Value')
plt.show()

In [None]:
plt.figure(figsize=(20, 6))
color = 'tab:blue'

plt.plot(active_capital_df, color=color_active, linewidth=1)
plt.plot(passivecapital_df, color=color_passive, linewidth=1)


plt.title('Disposible cash')
plt.xlabel('Time')
plt.ylabel('Cash')
plt.show()

### Average active vs passive portfolio value difference

In [None]:
poftfolio_diff = (active_portfolio_df['Portfolio Value'] - passive_portfolio_df['Portfolio Value'])
average_abs_difference = (active_portfolio_df['Portfolio Value'] - passive_portfolio_df['Portfolio Value']).mean()
average_rel_difference = (((active_portfolio_df['Portfolio Value'] / passive_portfolio_df['Portfolio Value']).mean())-1)*100

In [None]:
print(f'Average portfolio absolute difference in time: {average_abs_difference:.2f} USD')
print(f'Average portfolio relative difference in time: {average_rel_difference:.2f}%')

In [None]:
final_abs_difference = (active_portfolio_df['Portfolio Value'].iloc[-1] - passive_portfolio_df['Portfolio Value'].iloc[-1])
final_rel_difference = ((active_portfolio_df['Portfolio Value'].iloc[-1] / passive_portfolio_df['Portfolio Value'].iloc[-1])-1)*100

In [None]:
print(f'Final portfolio absolute difference: {final_abs_difference:.2f} USD')
print(f'Final portfolio relative difference: {final_rel_difference:.2f}%')