In [1]:
%reload_ext tensorboard
%tensorboard --logdir ./darts_logs --host localhost --port 8099

In [2]:
from helper_funcs.data import cleaned_market
from helper_funcs.preprocessing import timeseries_init, get_covariates
from helper_funcs.prediction import historical_predictions, display_prediction_part
from helper_funcs.error import error_print
from helper_funcs.inverse import inverse_func

from models import nlinear, tft, dlinear

from darts import TimeSeries
# from darts.timeseries import concatenate
from darts.models import NaiveSeasonal
from darts.utils.model_selection import train_test_split
from darts.dataprocessing.transformers import StaticCovariatesTransformer, Scaler

from pytorch_lightning.callbacks import ModelCheckpoint, RichProgressBar

from sklearn.preprocessing import StandardScaler

import torchmetrics
import torch

import pandas as pd
import matplotlib.pyplot as plt

In [3]:
RANDOM = 101
INPUT_CHUNK = 25
OUTPUT_CHUNK = 5
RETRAIN=True
LAST=False
RESET=False
EXP_MA = 14

checkpoint = ModelCheckpoint(monitor="val_loss")
progress_bar = RichProgressBar()

In [4]:
callbacks=[progress_bar]

In [5]:
# timeseries = timeseries_init(
#     time_col='Date',
#     static_cols=[],
#     value_cols=[
#         'Adj Close',
#         'Close',
#         'High',
#         'Low',
#         'Open',
#         'Volume'
#     ],
#     freq='B', # business days
#     fill_missing=True,
#     group_col='Ticker',
#     type='MULTI'
# )

timeseries = timeseries_init(
    time_col='Date',
    static_cols=[],
    value_cols=[
        'Adj Close', 'Close', 'High', 'Low',
        'Open', 'Volume', 'sentiment'],
    freq='B', # business days
    fill_missing=True,
    group_col='Ticker',
    type='Sentiment',
    
)

timeseries = StaticCovariatesTransformer().fit_transform(timeseries)

In [6]:
train, val = train_test_split(
    timeseries,
    axis=1,
    test_size=0.2,
    input_size=INPUT_CHUNK,
    horizon=OUTPUT_CHUNK,
    vertical_split_type='model-aware'
)

In [7]:
len(train[0]), len(val[0])

(1040, 288)

In [8]:
target_train, past_train, future_train,target_val, past_val, future_val = get_covariates(
        type='MULTI',
        data={"train": train, "val": val},
        target_col=['Close'],
        past_cov=['High', 'Low', 'Open', 'Volume'],
        future_cov=['holidays'])

In [9]:
target_train_sent, past_train_sent, _, target_val_sent, past_val_sent, _ = get_covariates(
        type='MULTI',
        data={"train": train, "val": val},
        target_col=['Close'],
        past_cov=['High', 'Low', 'Open', 'Volume', 'sentiment'],
        future_cov=['holidays'])

In [10]:
# target_train_sent, past_train_sent, _, target_val_sent, past_val_sent, _ = get_covariates(
#         type='MULTI',
#         data={"train": train, "val": val},
#         target_col=['Close'],
#         past_cov=['sentiment'],
#         future_cov=['holidays'])

## Raw sentiment data

In [11]:
apple = timeseries[0]
msft = timeseries[3]

In [12]:
apple.pd_dataframe()

component,Adj Close,Close,High,Low,Open,Volume,sentiment,holidays
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2015-01-02,24.603209,27.332500,27.860001,26.837500,27.847500,212818400.0,0.451456,0.0
2015-01-05,23.910091,26.562500,27.162500,26.352501,27.072500,257142000.0,0.444978,0.0
2015-01-06,23.912342,26.565001,26.857500,26.157499,26.635000,263188400.0,0.416930,0.0
2015-01-07,24.247650,26.937500,27.049999,26.674999,26.799999,160423600.0,0.394427,0.0
2015-01-08,25.179296,27.972500,28.037500,27.174999,27.307501,237458000.0,0.367257,0.0
...,...,...,...,...,...,...,...,...
2019-12-25,69.623230,71.067497,71.222504,70.730003,71.172501,48478800.0,0.377232,1.0
2019-12-26,71.004585,72.477501,72.495003,71.175003,71.205002,93121200.0,0.375746,0.0
2019-12-27,70.977631,72.449997,73.492500,72.029999,72.779999,146266000.0,0.402397,0.0
2019-12-30,71.398888,72.879997,73.172501,71.305000,72.364998,144114400.0,0.398724,0.0


# Scaling

In [None]:
scaler_target = Scaler(StandardScaler())
scaler_past = Scaler(StandardScaler())

scaler_target_sent = Scaler(StandardScaler())
scaler_past_sent = Scaler(StandardScaler())

# without sentiment
target_train_scaled = scaler_target.fit_transform(target_train)
target_val_scaled = scaler_target.transform(target_val)

past_train_scaled = scaler_past.fit_transform(past_train)
past_val_scaled = scaler_past.transform(past_val)

# with sentiment
target_train_sent_scaled = scaler_target_sent.fit_transform(target_train_sent)
target_val_sent_scaled = scaler_target_sent.transform(target_val_sent)

past_train_sent_scaled = scaler_past_sent.fit_transform(past_train_sent)
past_val_sent_scaled = scaler_past_sent.transform(past_val_sent)

# Models

In [None]:
model_baseline = NaiveSeasonal(K=5)
model_baseline.fit(target_train[0])

In [None]:
model_nlinear_default = nlinear.nlinear_default(INPUT_CHUNK, OUTPUT_CHUNK, RANDOM, \
    callbacks, target_train, target_val)

In [None]:
model_nlinear_minmax = nlinear.nlinear_minmax(INPUT_CHUNK, OUTPUT_CHUNK, RANDOM, \
    callbacks, target_train_scaled, target_val_scaled)

In [None]:
model_nlinear_minmax_cov = nlinear.nlinear_minmax_cov(INPUT_CHUNK, OUTPUT_CHUNK, RANDOM, \
    callbacks, target_train_scaled, target_val_scaled, past_train_scaled, past_val_scaled, \
        future_train, future_val)

In [None]:
model_nlinear_minmax_sentiment = nlinear.nlinear_minmax_sentiment(INPUT_CHUNK, OUTPUT_CHUNK, RANDOM, \
    callbacks, target_train_sent_scaled, target_val_sent_scaled, past_train_sent_scaled, past_val_sent_scaled, \
        future_train, future_val)

In [None]:
model_nlinear_minmax_sentiment_opt = nlinear.nlinear_minmax_sentiment_opt(INPUT_CHUNK, OUTPUT_CHUNK, RANDOM, \
    callbacks, target_train_sent_scaled, target_val_sent_scaled, past_train_sent_scaled, past_val_sent_scaled, \
        future_train, future_val)

In [None]:
model_tft_sentiment_opt = tft.tft_sentiment_opt(INPUT_CHUNK, OUTPUT_CHUNK, RANDOM, \
    callbacks, target_train_sent_scaled, target_val_sent_scaled, past_train_sent_scaled, past_val_sent_scaled, \
        future_train, future_val)

In [None]:
model_tft_custom_loss = tft.tft_custom_loss(INPUT_CHUNK, OUTPUT_CHUNK, RANDOM, \
    callbacks, target_train_sent_scaled, target_val_sent_scaled, past_train_sent_scaled, past_val_sent_scaled, \
        future_train, future_val)

In [None]:
model_dlinear_sentiment = dlinear.dlinear_sentiment(INPUT_CHUNK, OUTPUT_CHUNK, RANDOM, \
    callbacks, target_train_sent_scaled, target_val_sent_scaled, past_train_sent_scaled, past_val_sent_scaled, \
        future_train, future_val)

In [None]:
model_nlinear_myloss = nlinear.nlinear_myloss(INPUT_CHUNK, OUTPUT_CHUNK, RANDOM, \
    callbacks, target_train_sent_scaled, target_val_sent_scaled, past_train_sent_scaled, past_val_sent_scaled, \
        future_train, future_val)

In [None]:
model_nlinear_minmax_sentiment_opt_updated = nlinear.nlinear_minmax_sentiment_opt_updated(INPUT_CHUNK, OUTPUT_CHUNK, RANDOM, \
    callbacks, target_train_sent_scaled, target_val_sent_scaled, past_train_sent_scaled, past_val_sent_scaled, \
        future_train, future_val)

# Forecasts

In [None]:
hist_baseline = historical_predictions(
    model_baseline, target_val, INPUT_CHUNK, OUTPUT_CHUNK, RETRAIN, LAST
)

hist_def = historical_predictions(
    model_nlinear_default, target_val, INPUT_CHUNK, OUTPUT_CHUNK, RETRAIN, LAST)

hist_mm = historical_predictions(
    model_nlinear_minmax, target_val_scaled, INPUT_CHUNK, OUTPUT_CHUNK, RETRAIN, LAST)

hist_mm_cov = historical_predictions(
    model_nlinear_minmax_cov, target_val_scaled, INPUT_CHUNK, OUTPUT_CHUNK, RETRAIN, LAST, \
        covariates=True, past=past_val_scaled, future=future_val
    )

hist_sentiment = historical_predictions(
    model_nlinear_minmax_sentiment, target_val_sent_scaled, INPUT_CHUNK, OUTPUT_CHUNK, RETRAIN, LAST, \
        covariates=True, past=past_val_sent_scaled, future=future_val
    )

# hist_sentiment_opt = historical_predictions(
#     model_nlinear_minmax_sentiment_opt, target_val_sent_scaled, INPUT_CHUNK, OUTPUT_CHUNK, RETRAIN, LAST, \
#         covariates=True, past=past_val_sent_scaled, future=future_val
#     )

# hist_tft_sentiment_opt = historical_predictions(
#     model_tft_sentiment_opt, target_val_sent_scaled, INPUT_CHUNK, OUTPUT_CHUNK, RETRAIN, LAST, \
#         covariates=True, past=past_val_sent_scaled, future=future_val
#     )

# hist_dlinear_sentiment = historical_predictions(
#     model_dlinear_sentiment, target_val_sent_scaled, INPUT_CHUNK, OUTPUT_CHUNK, RETRAIN, LAST, \
#         covariates=True, past=past_val_sent_scaled, future=future_val
#     )

hist_nlinear_myloss = historical_predictions(
    model_nlinear_myloss, target_val_sent_scaled, INPUT_CHUNK, OUTPUT_CHUNK, RETRAIN, LAST, \
        covariates=True, past=past_val_sent_scaled, future=future_val
    )

hist_tft_custom_loss = historical_predictions(
    model_tft_custom_loss, target_val_sent_scaled, INPUT_CHUNK, OUTPUT_CHUNK, RETRAIN, LAST, \
        covariates=True, past=past_val_sent_scaled, future=future_val
    )

hist_sentiment_opt_updated = historical_predictions(
    model_nlinear_minmax_sentiment_opt_updated, target_val_sent_scaled, INPUT_CHUNK, OUTPUT_CHUNK, RETRAIN, LAST, \
        covariates=True, past=past_val_sent_scaled, future=future_val
    )

In [None]:
def diplay_prediction(hist, target, inverse=False, scaler=None):
    if inverse:
        hist, target = inverse_func(scaler, hist, target)

    hist.plot(label='predict')
    target[0][30:].plot(label='true')

    error_print(target[0], hist)

    return hist

### Baseeline

In [None]:
hist_baselinee = diplay_prediction(hist_baseline, target_val, inverse=False)

### Normalization

In [None]:
hist_def = diplay_prediction(hist_def, target_val, inverse=False)

### MinMax

In [None]:
hist_mm = diplay_prediction(hist_mm, target_val_scaled, inverse=True, scaler=scaler_target)

### Covariates

In [None]:
hist_mm_cov = diplay_prediction(hist_mm_cov, target_val_scaled, inverse=True, scaler=scaler_target)

### Sentiment

In [None]:
hist_sentiment = diplay_prediction(hist_sentiment, target_val_sent_scaled, inverse=True, scaler=scaler_target_sent)

### Sentiment opt

In [None]:
# hist_sentiment_opt = diplay_prediction(hist_sentiment_opt, target_val_sent_scaled, inverse=True, scaler=scaler_target_sent)

### TFT opt

In [None]:
# hist_tft_sentiment_opt = diplay_prediction(hist_tft_sentiment_opt, target_val_sent_scaled, inverse=True, scaler=scaler_target_sent)

### DLinear

In [None]:
# hist_dlinear_sentiment = diplay_prediction(hist_dlinear_sentiment, target_val_sent_scaled, inverse=True, scaler=scaler_target_sent)

### My_loss

In [None]:
hist_tft_custom_loss = diplay_prediction(hist_tft_custom_loss, target_val_sent_scaled, inverse=True, scaler=scaler_target_sent)

### Sentiment updated EMA

In [None]:
hist_sentiment_opt_updated = diplay_prediction(hist_sentiment_opt_updated, target_val_sent_scaled, inverse=True, scaler=scaler_target_sent)

# DELIMEIRET

In [None]:
display_prediction_part(target_val, 110, 140, \
    {
        'baseline': hist_baseline,
        'default': hist_def,
        # 'minmax': hist_mm,
        # 'covariate': hist_mm_cov,
        'sentiment': hist_sentiment,
        # 'sentiment_opt': hist_sentiment_opt,
        # 'tft': hist_tft_sentiment_opt,
        # 'dlinear': hist_dlinear_sentiment,
        # 'sentiment_myloss': hist_tft_custom_loss,
        # 'sentiment EMA': hist_sentiment_opt_updated
    })

In [None]:
display_prediction_part(target_val, 130, 145, \
    {
        'baseline': hist_baseline,
        'default': hist_def,
        # 'minmax': hist_mm,
        # 'covariate': hist_mm_cov,
        'sentiment': hist_sentiment,
        # 'sentiment_opt': hist_sentiment_opt,
        # 'tft': hist_tft_sentiment_opt,
        # 'dlinear': hist_dlinear_sentiment,
        # 'sentiment_myloss': hist_tft_custom_loss,
        'sentiment EMA': hist_sentiment_opt_updated
    })

In [None]:
display_prediction_part(target_val, 180, 195, \
    {
        'baseline': hist_baseline,
        'default': hist_def,
        # 'minmax': hist_mm,
        # 'covariate': hist_mm_cov,
        'sentiment': hist_sentiment,
        # 'sentiment_opt': hist_sentiment_opt,
        # 'tft': hist_tft_sentiment_opt,
        # 'dlinear': hist_dlinear_sentiment,
        # 'sentiment_myloss': hist_tft_custom_loss,
        'sentiment EMA': hist_sentiment_opt_updated
    })

In [None]:
display_prediction_part(target_val, 220, 240, \
    {
        'baseline': hist_baseline,
        'default': hist_def,
        # 'minmax': hist_mm,
        # 'covariate': hist_mm_cov,
        'sentiment': hist_sentiment,
        # 'sentiment_opt': hist_sentiment_opt,
        # 'tft': hist_tft_sentiment_opt,
        # 'dlinear': hist_dlinear_sentiment,
        # 'sentiment_myloss': hist_tft_custom_loss,
        'sentiment EMA': hist_sentiment_opt_updated
    })

In [None]:
display_prediction_part(target_val, 260, 285, \
    {
        'baseline': hist_baseline,
        'default': hist_def,
        # 'minmax': hist_mm,
        # 'covariate': hist_mm_cov,
        'sentiment': hist_sentiment,
        # 'sentiment_opt': hist_sentiment_opt,
        # 'tft': hist_tft_sentiment_opt,
        # 'dlinear': hist_dlinear_sentiment,
        # 'sentiment_myloss': hist_tft_custom_loss,
        'sentiment EMA': hist_sentiment_opt_updated
    })

There is little difference between using MinMax scaling and the normalization that is introduced in the paper for stock prices.

We will stick with minmax due to better accuracy.