In [None]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sys
import os

import warnings
import logging
logging.disable(logging.CRITICAL)
from tqdm.autonotebook import tqdm

import torch
from torch.nn import MSELoss, CrossEntropyLoss
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
import pytorch_lightning as pl

import optuna
from optuna.integration import PyTorchLightningPruningCallback

from darts import TimeSeries, concatenate
from darts.models import TFTModel
from darts.dataprocessing.transformers import Scaler
from darts.metrics import smape, rmse, mape

from sklearn.model_selection import train_test_split

In [None]:
print(torch.backends.mps.is_available())

In [None]:
%run ../utils/preprocessing.ipynb
%run ../utils/losses.ipynb

In [None]:
mps_device = torch.device("mps")

if torch.backends.mps.is_available():
    mps_device = torch.device("mps")
    accelerator="mps"
else:
    print ("MPS device not found.")
    accelerator="cpu"

In [None]:
TICKER = 'AMZN'
FREQ_INT = 15
DATA_FREQUENCY = minute_frequencies_conventions[FREQ_INT]
MODEL_NAME = f'{TICKER}_{FREQ_INT}_TFT'

OUTPUT_LEN = 1
INPUT_LEN = 70 if FREQ_INT == 15 else 270

tft_dir = f'Users/work/repos/diplomka/tft/saved_models/{MODEL_NAME}'

In [None]:
model = TFTModel.load_from_checkpoint(MODEL_NAME, work_dir=f'/Users/work/repos/diplomka/tft/saved_models/{FREQ_INT}')

### Data load

In [None]:
stock = load_stock_data(f'../data/resampled_data/{DATA_FREQUENCY}/{TICKER}_resampled_{DATA_FREQUENCY}.csv', FREQ_INT)

X_y_df = separate(stock)
splits = split_data(**X_y_df)

ts_splits = transform_splits_to_time_series(**splits)

ts_full = transform_to_time_series(**X_y_df)

scaled_splits_data = scale_splits_data(**ts_splits)

scaled_full_data = scale_full_data(ts_full['ts_X_full'], ts_full['ts_y_full'], scaled_splits_data['scaler_X'], scaled_splits_data['scaler_y'])

stock_full = {
    "ticker": TICKER,
    "stock": stock,
    "splits": splits,
    "ts_splits": ts_splits,
    "ts_full": ts_full,
    "scaled_splits_data": scaled_splits_data,
    "scaled_full_data": scaled_full_data
}

### Individual prediction

In [None]:
raw_stock = stock_full['stock']

plt.figure(100, figsize=(20, 7))
sns.lineplot(x = "datetime", y = "close", data = raw_stock[40:], palette="coolwarm", label=raw_stock['ticker'].unique()[0]);
sns.lineplot(x = "datetime", y = "SMA_5D", data = stock[40:], palette="orange");
sns.lineplot(x = "datetime", y = "SMA_1D", data = stock[40:], palette="coolwarm");
plt.legend()

In [None]:
def make_predictions(model, scaled_splits_data, scaled_full_data, input_len, output_len, limit=None):
    range_len = len(scaled_splits_data['scaled_y_test'])
    predictions = []
    predictions_count = 0

    for i in range(0, range_len, output_len):
        beginning_idx = len(scaled_splits_data['scaled_y_train']) + len(scaled_splits_data['scaled_y_val']) - input_len + i
        end_idx = len(scaled_splits_data['scaled_y_train']) + len(scaled_splits_data['scaled_y_val']) + i

        # Making a prediction
        pred = model.predict(
            n=output_len,
            series=scaled_full_data['scaled_y_full'][beginning_idx:end_idx],
            past_covariates=scaled_full_data['scaled_X_full'][beginning_idx:end_idx],
            num_samples=1,
            n_jobs=-1,
            verbose=False
        )

        predictions.append(pred)
        predictions_count += 1
        
        if limit is not None and predictions_count >= limit:
            break

    individual_pred = concatenate(predictions)
    return individual_pred

In [None]:
%%capture
individual_pred = make_predictions(model, scaled_splits_data, scaled_full_data, INPUT_LEN, OUTPUT_LEN)

In [None]:
pred_unscaled = scaled_splits_data['scaler_y'].inverse_transform(individual_pred)

### Create the predictions dataframe for backtesting

In [None]:
def pred_actual(actual, prediction):    
    dfY = pd.DataFrame()
    dfY[["mid_close", 'spread', 'vol']] = TimeSeries.pd_dataframe(actual)
    dfY["prediction"] = TimeSeries.pd_series(prediction)
    
    min_vol, max_vol = dfY['vol'].min(), dfY['vol'].max()
    dfY['normalized_volume'] = (dfY['vol'] - min_vol) / (max_vol - min_vol)

        
    return dfY

actual = stock_full['ts_splits']['ts_X_test'][['close', 'spread', 'vol']]
dfY = pred_actual(actual, pred_unscaled)

In [None]:
dfY

In [None]:
print("RMSE: {:.2f} units".format(rmse_loss(dfY['mid_close'], dfY['prediction'])))
print("MAPE: {:.2f}%".format(mape_loss(dfY['mid_close'], dfY['prediction'])))

### Plot the forecast, ADD VOLUME, transparency

In [None]:
plt.style.use('ggplot')

def plot_dual_axis(df):
    fig, ax1 = plt.subplots(figsize=(15, 5))

    color = 'tab:blue'
    prediction_color = 'tab:green'

    ax1.set_xlabel('Time')
    ax1.set_ylabel('Price', color=color)
    ax1.plot(df.index, df['mid_close'], color=color, alpha=0.8, linewidth=1, label='Actual Price')
    ax1.plot(df.index, df['prediction'], color=prediction_color, linewidth=1, alpha=0.8, label='Predicted Price')
    ax1.tick_params(axis='y', labelcolor=color)
    
    ax1.legend(loc='upper left')  

    ax2 = ax1.twinx()
    color = 'tab:red'
    ax2.set_ylabel('Volume', color=color)
    ax2.bar(df.index, df['normalized_volume'], alpha=0.2, color=color, label='Normalized Volume')
    ax2.tick_params(axis='y', labelcolor=color)

    ax2.legend(loc='upper right')

    ax1.grid(True, which='both', linestyle='--', linewidth=0.5)

    plt.title(f'TFT 15m - {TICKER}: Predicted vs Actual Price')


    plt.title(f'TFT 15m - {TICKER}: Predicted vs Actual Price')
    fig.tight_layout()
    plt.show()

# Assuming dfY is the DataFrame you provided
plot_dual_axis(dfY)

### Backtesting strategy

In [None]:
%run ../utils/backtesting.ipynb

In [None]:
buy_threshold=0.001
sell_threshold=0.007	
stop_loss_threshold = 0.1

In [None]:
active_portfolio_df, active_capital_df, investment_amt_sell_df, investment_amt_buy_df, number_of_trades, total_comission = active_strategy(
    dfY, 
    total_capital=10000, 
    shares_capital=5000, 
    investment_pct=0.002, 
    commission=0.002, 
    buy_threshold=buy_threshold, 
    sell_threshold=sell_threshold,
    stop_loss_threshold=stop_loss_threshold
)

In [None]:
passive_portfolio_df, passive_capital_df = passive_strategy(
    dfY,
    capital=10000, 
    shares=0, 
    commission=0.002, 
    n=(60/FREQ_INT)*6.5*7,
    stop_loss_threshold=stop_loss_threshold
)

In [None]:
plt.style.use('ggplot')

color_passive = 'tab:blue'
color_active = 'tab:green'


plt.figure(figsize=(18, 5))

plt.plot(active_portfolio_df['Portfolio Value'], color=color_active, linewidth=1, label='Active Portfolio Value')

plt.plot(passive_portfolio_df['Portfolio Value'], color=color_passive, linewidth=1, label='Passive Portfolio Value')


# Add green dots for 'buy' actions
buy_indices = active_portfolio_df[active_portfolio_df['Action'] == 'buy'].index
plt.scatter(buy_indices, active_portfolio_df.loc[buy_indices, 'Portfolio Value'], color='green', label='Buy', marker='o')

# Add red dots for 'sell' actions
sell_indices = active_portfolio_df[active_portfolio_df['Action'] == 'sell'].index
plt.scatter(sell_indices, active_portfolio_df.loc[sell_indices, 'Portfolio Value'], color='red', label='Sell', marker='o')
#
# Add black dots for 'stop loss' actions
stop_loss_indeces_active = active_portfolio_df[active_portfolio_df['Action'] == 'Stop Loss Sell'].index
plt.scatter(stop_loss_indeces_active, active_portfolio_df.loc[stop_loss_indeces_active, 'Portfolio Value'], color='black', label='Active Portfolio Stop Loss', marker='x')

#stop_loss_indeces_pasive = passive_capital_df[passive_capital_df['Action'] == 'Stop Loss Sell'].index
plt.scatter(stop_loss_indeces_pasive, passive_portfolio_df.loc[stop_loss_indeces_pasive, 'Portfolio Value'], color='black', label='Passive Portfolio Stop Loss', marker='o')

plt.legend(loc='upper left')  



plt.title(f'TFT 15m - {TICKER} Portfolio Value Over Time')
plt.xlabel('Time')
plt.ylabel('Portfolio Value')
plt.show()

In [None]:
plt.figure(figsize=(18, 5))

color_passive = 'tab:blue'
color_active = 'tab:green'

#plt.plot(active_capital_df, color=color_active, linewidth=1, label='Active Portfolio Disposable Cash')
plt.plot(passive_capital_df['Available Cash'], color=color_passive, linewidth=1, label='Passive Portfolio Disposable Cash')

# Add black dots for 'stop loss' actions
plt.scatter(stop_loss_indeces_pasive, passive_capital_df.loc[stop_loss_indeces_pasive, 'Available Cash'], color='black', label='Passive Portfolio Stop Loss', marker='x')
plt.scatter(stop_loss_indeces_active, active_capital_df.loc[stop_loss_indeces_active, 'Available Cash'], color='black', label='Active Portfolio Stop Loss', marker='o')

plt.legend(loc='lower left')  


plt.title('Disposible cash')
plt.xlabel('Time')
plt.ylabel('Cash')
plt.show()

In [None]:
plt.figure(figsize=(18, 5))

color_stocks = 'tab:blue'
color_cash = 'tab:green'

stocks = active_portfolio_df['Portfolio Value'] - active_capital_df['Available Cash']

# Plotting Stock Capital and Available Cash on the left y-axis with empty lines
line1, = plt.plot(stocks, color=color_stocks, linewidth=1, label='Stock Capital')
line2, = plt.plot(active_capital_df['Available Cash'], color=color_cash, linewidth=1, label='Available Cash')

plt.ylabel('Stock Capital and Available Cash', color=color_stocks)
plt.tick_params(axis='y', labelcolor=color_stocks)

# Creating a second y-axis for the scales of investment_amt_buy_df and investment_amt_sell_df
ax2 = plt.gca().twinx()
line3, = ax2.plot(investment_amt_buy_df, color=color_cash, linestyle='-', linewidth=1, label='Buy Trade Size')
line4, = ax2.plot(investment_amt_sell_df, color=color_stocks, linestyle='-', linewidth=1, label='Sell Trade Size')

ax2.set_ylabel('Trade Size', color=color_cash)
ax2.tick_params(axis='y', labelcolor=color_cash)

# Legends for the left and right axes
legend1 = plt.legend(handles=[line2, line1], labels=['Available Cash', 'Stock Capital'], loc='upper left')
legend2 = ax2.legend(handles=[line3, line4], labels=['Buy Trade Size', 'Sell Trade Size'], loc='upper right')

# Add both legends to the plot
plt.gca().add_artist(legend1)
plt.gca().add_artist(legend2)

plt.title('Stock Capital, Available Cash, Buy and Sell Trade Size')
plt.xlabel('Time')

plt.show()


In [None]:
plt.figure(figsize=(20, 6))

color_stocks = 'tab:blue'
color_cash = 'tab:green'

# Calculate the normalized portfolio size at each step
total_portfolio = stocks + active_capital_df['Available Cash']
normalized_stocks = stocks / total_portfolio
normalized_cash = active_capital_df['Available Cash'] / total_portfolio

# Plotting the stack plot with normalized values
plt.stackplot(active_capital_df.index, normalized_stocks, normalized_cash, labels=['Normalized Stock Capital', 'Normalized Available Cash'], colors=[color_stocks, color_cash])

plt.fill_between(active_capital_df.index, normalized_stocks, alpha=0.3, color=color_stocks)
plt.fill_between(active_capital_df.index, normalized_stocks, 1, alpha=0.3, color=color_cash)

plt.ylabel('Normalized Portfolio Components')
plt.title('Stack Plot of Normalized Stock Capital and Available Cash')
plt.xlabel('Time')

plt.legend(loc='upper left')

plt.show()


### Active portfolio trades

In [None]:
# Filter out rows with 'uncertain' actions
valid_trades_df = active_portfolio_df[active_portfolio_df['Action'].isin(['Buy', 'Sell'])]

# Calculate the profit for each trade
valid_trades_df['Trade Profit'] = valid_trades_df['Portfolio Value'].diff()

# Calculate the average profit per trade
average_profit_per_trade = valid_trades_df['Trade Profit'].mean()

# Calculate the percentage of positive trades
positive_trades_percentage = (valid_trades_df['Trade Profit'] > 0).sum() / len(valid_trades_df) * 100

print(f"Average Profit per Trade: {average_profit_per_trade:.2f} USD")
print(f"Percentage of Positive Trades: {positive_trades_percentage:.2f}%")

### Total profit/loss from active

In [None]:
active_profit_abs = active_portfolio_df.iloc[-1]['Portfolio Value'] - active_portfolio_df.iloc[0]['Portfolio Value']
active_profit_rel = (active_portfolio_df.iloc[-1]['Portfolio Value'] / active_portfolio_df.iloc[0]['Portfolio Value'] -1) * 100

pasive_profit_abs = passive_portfolio_df.iloc[-1]['Portfolio Value'] - passive_portfolio_df.iloc[0]['Portfolio Value']
pasive_profit_rel = (passive_portfolio_df.iloc[-1]['Portfolio Value'] / passive_portfolio_df.iloc[0]['Portfolio Value'] -1) * 100

print(f"Active portfolit absolute profit: {active_profit_abs:.2f} USD")
print(f"Active portfolit relative profit: {active_profit_rel:.2f}%")
print()
print(f"Passive portfolit absolute profit: {pasive_profit_abs:.2f} USD")
print(f"Passive portfolit relative profit: {pasive_profit_rel:.2f}%")

### Average active vs passive portfolio value difference

In [None]:
poftfolio_diff = (active_portfolio_df['Portfolio Value'] - passive_portfolio_df['Portfolio Value'])
average_abs_difference = (active_portfolio_df['Portfolio Value'] - passive_portfolio_df['Portfolio Value']).mean()
average_rel_difference = (((active_portfolio_df['Portfolio Value'] / passive_portfolio_df['Portfolio Value']).mean())-1)*100

In [None]:
print(f'Average portfolio absolute difference in time: {average_abs_difference:.2f} USD')
print(f'Average portfolio relative difference in time: {average_rel_difference:.2f}%')

In [None]:
final_abs_difference = (active_portfolio_df['Portfolio Value'].iloc[-1] - passive_portfolio_df['Portfolio Value'].iloc[-1])
final_rel_difference = ((active_portfolio_df['Portfolio Value'].iloc[-1] / passive_portfolio_df['Portfolio Value'].iloc[-1])-1)*100

In [None]:
print(f'Final portfolio absolute difference: {final_abs_difference:.2f} USD')
print(f'Final portfolio relative difference: {final_rel_difference:.2f}%')