In [None]:
from dotenv import load_dotenv
import datetime
import json
import sys
import os
import functools
import datetime
load_dotenv()

sys.path.append('../..')
from data_engineering.shared import read_parquet_from_s3

In [None]:
import tensorflow as tf
import pandas as pd
import numpy as np
import keras
import optuna
from keras import backend as K
import seaborn as sns
import matplotlib.pyplot as plt
from keras.layers import Input, BatchNormalization 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import load_model, Model, model_from_json, Sequential
from tensorflow.keras.optimizers.legacy import Adam
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, TensorBoard
from tensorflow.keras.layers import Dense, Bidirectional, LSTM, Dropout, MultiHeadAttention, Attention, RepeatVector, Conv1D, Flatten, GRU
from optuna.integration import TFKerasPruningCallback
import pandas_market_calendars as mcal
import gc
from keras import backend as K

In [None]:
def preprocess_data(ticker):
    df = read_parquet_from_s3(ticker)
    df.set_index('t', inplace=True)
    df_embedded = df['embedded_text'].apply(pd.Series)
    df_embedded = df_embedded.rename(columns=lambda x: 'embed_' + str(x))
    df = pd.concat([df, df_embedded], axis=1)
    df.drop('embedded_text', axis=1, inplace=True)
    cols = list(df.columns)
    cols.remove('target')
    cols.append('target')
    df = df[cols]

    scaler = MinMaxScaler()
    scaled_df = scaler.fit_transform(df.values)

    def create_rolling_sequences_with_timestamps(data, timestamps, seq_length):
        X, y, ts = [], [], []
        for i in range(len(data) - seq_length):
            seq = data[i: i + seq_length]
            X.append(seq[:-1])
            y.append(seq[-1, -1])
            ts.append(timestamps[i + seq_length - 1])
        return np.array(X), np.array(y), np.array(ts)

    seq_length = lookback
    X, y, timestamps = create_rolling_sequences_with_timestamps(scaled_df, df.index.values, seq_length)

    train_size = int(len(scaled_df) * 0.8)
    X_train, X_test = X[:train_size], X[train_size:]
    y_train, y_test = y[:train_size], y[train_size:]

    return X_train, y_train, X_test, y_test, X, y, scaler, timestamps

In [None]:
@keras.saving.register_keras_serializable()
def combined_metric(y_true, y_pred):
    
    # Convert inputs to tensors if they're not
    y_true = tf.convert_to_tensor(y_true, dtype=tf.float32)
    y_pred = tf.convert_to_tensor(y_pred, dtype=tf.float32)
    
    # Calculating RMSE
    rmse = tf.sqrt(tf.reduce_mean(tf.square(y_pred - y_true)))
    
    # Calculating MAE
    mae = tf.reduce_mean(tf.abs(y_pred - y_true))
    
    # Calculating R-squared
    SS_res = tf.reduce_sum(tf.square(y_true - y_pred))
    SS_tot = tf.reduce_sum(tf.square(y_true - tf.reduce_mean(y_true)))
    r2 = 1 - SS_res / (SS_tot + tf.keras.backend.epsilon())
    
    # Calculating Directional Accuracy
    direction_true = tf.sign(y_true - tf.reduce_mean(y_true))
    direction_pred = tf.sign(y_pred - tf.reduce_mean(y_pred))
    directional_accuracy = tf.reduce_mean(tf.cast(tf.equal(direction_true, direction_pred), dtype=tf.float32))
    
    # Combined metric (normalized by range for RMSE and MAE, increased weight for r2 and directional accuracy)
    y_range = tf.reduce_max(y_true) - tf.reduce_min(y_true)
    combined = (rmse/y_range + 0.5 * mae/y_range - 1.5 * r2 - directional_accuracy)
    
    return combined

In [None]:
def create_model(input_shape, model_type, params):
    if model_type == 'lstm':
        return create_lstm_model(input_shape, params)
    elif model_type == 'gru':
        return create_gru_model(input_shape, params)
    elif model_type == 'wavenet':
        return create_wavenet_model(input_shape, params)
    elif model_type == 'simple_transformer':
        return create_simple_transformer_model(input_shape, params)
    elif model_type == 'stacked':
        return create_stacked_model(input_shape, params)
    else:
        raise ValueError(f"Unknown model type: {model_type}")

# LSTM Model
def create_lstm_model(input_shape, params):
    optimizer = Adam(clipnorm=1.0)  
    model = Sequential()
    model.add(LSTM(params['unit_number'], input_shape=input_shape, return_sequences=True,
                    kernel_regularizer=l2(params['l2_strength']), 
                    recurrent_regularizer=l2(params['l2_strength']), 
                    bias_regularizer=l2(params['l2_strength'])))
    model.add(BatchNormalization())
    model.add(Dropout(params['dropout_rate']))
    model.add(LSTM(params['unit_number'],
                    kernel_regularizer=l2(params['l2_strength']), 
                    recurrent_regularizer=l2(params['l2_strength']), 
                    bias_regularizer=l2(params['l2_strength'])))
    model.add(BatchNormalization())
    model.add(Dense(1))
    model.compile(optimizer=optimizer, loss='mse', metrics=[combined_metric])
    return model

# GRU Model
def create_gru_model(input_shape, params):
    optimizer = Adam(clipnorm=1.0)  
    model = Sequential()
    model.add(GRU(params['unit_number'], input_shape=input_shape, return_sequences=True,
                    kernel_regularizer=l2(params['l2_strength']), 
                    recurrent_regularizer=l2(params['l2_strength']), 
                    bias_regularizer=l2(params['l2_strength'])))
    model.add(BatchNormalization())
    model.add(Dropout(params['dropout_rate']))
    model.add(GRU(params['unit_number'], 
                    kernel_regularizer=l2(params['l2_strength']), 
                    recurrent_regularizer=l2(params['l2_strength']), 
                    bias_regularizer=l2(params['l2_strength'])))
    model.add(BatchNormalization())
    model.add(Dense(1))
    model.compile(optimizer=optimizer, loss='mse', metrics=[combined_metric])
    return model

# WaveNet Model
def create_wavenet_model(input_shape, params):
    optimizer = Adam(clipnorm=1.0)
    model = Sequential()
    for _ in range(params.get('n_layers', 2)):
        model.add(Conv1D(params['filter_number'], 2, dilation_rate=2**_, padding='causal', activation=params['activation']))
        model.add(BatchNormalization())
        model.add(Dropout(params['dropout_rate']))
    model.add(Flatten())
    model.add(Dense(1))
    model.compile(optimizer=optimizer, loss='mse', metrics=[combined_metric])
    return model

# Simple-Transformer Model
def create_simple_transformer_model(input_shape, params):
    input_tensor = Input(shape=input_shape)
    attention_output = MultiHeadAttention(num_heads=params['num_heads'], key_dim=params['key_dim'])(query=input_tensor, key=input_tensor, value=input_tensor)
    x = Dropout(params['dropout_rate'])(attention_output)
    x = Flatten()(x)
    output_tensor = Dense(1)(x)

    model = Model(inputs=input_tensor, outputs=output_tensor)

    optimizer = Adam(clipnorm=1.0)
    model.compile(optimizer=optimizer, loss='mse', metrics=[combined_metric])
    # model.compile(optimizer=optimizer, loss='mse')
    return model

# Stacked Model (Conv1D + LSTM + LSTM)
def create_stacked_model(input_shape, params):
    optimizer = Adam(clipnorm=1.0)  
    model = Sequential()
    model.add(Conv1D(filters=params['filter_number'], kernel_size=min(4, params['kernel_size']), input_shape=input_shape, activation=params.get('activation', 'relu')))
    model.add(BatchNormalization())
    model.add(LSTM(params['unit_number'], return_sequences=True,
                    kernel_regularizer=l2(params['l2_strength']), 
                    recurrent_regularizer=l2(params['l2_strength']), 
                    bias_regularizer=l2(params['l2_strength'])))
    model.add(BatchNormalization())
    model.add(Dropout(params['dropout_rate']))
    model.add(LSTM(params['unit_number'], 
                    kernel_regularizer=l2(params['l2_strength']), 
                    recurrent_regularizer=l2(params['l2_strength']), 
                    bias_regularizer=l2(params['l2_strength'])))
    model.add(BatchNormalization())
    model.add(Dense(1))
    model.compile(optimizer=optimizer, loss='mse', metrics=[combined_metric])
    return model

In [None]:
def objective(trial, **kwargs):
    X_train = kwargs['X_train']
    y_train = kwargs['y_train']
    X_test = kwargs['X_test']
    y_test = kwargs['y_test']
    
    model_type = trial.suggest_categorical('model_type', [
        'lstm', 'gru', 'wavenet', 'simple_transformer', 'stacked'
    ])
    
    params = {
        'unit_number': trial.suggest_int('unit_number', 20, 180),
        'dropout_rate': trial.suggest_float('dropout_rate', 0, 0.6),
        'batch_size': trial.suggest_categorical('batch_size', [5, 10, 20, 40, 50, 60, 80, 100]),
        'epochs': trial.suggest_int('epochs', 25, 100),
        'filter_number': trial.suggest_int('filter_number', 8, 512),
        'kernel_size': trial.suggest_int('kernel_size', 1, 3),
        'l2_strength': trial.suggest_float('l2_strength', 1e-6, 1e-1, log=True),
        'activation': trial.suggest_categorical('activation', ['relu', 'sigmoid', 'tanh']),
        'num_heads': trial.suggest_int('num_heads', 1, 16),
        'key_dim': trial.suggest_int('key_dim', 1, 16),
        'lr_factor': trial.suggest_float('lr_factor', 0.1, 0.9, step=0.1),
        'min_lr': trial.suggest_float('min_lr', 1e-5, 1e-2, log=True),
    }
    input_shape = (X.shape[1], X.shape[2])
    
    log_dir = f"{log_base_path}{model_type}-{trial.number}_{datetime.datetime.now().strftime('%Y%m%d-%H%M%S')}"
    tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)
    
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=params['lr_factor'], patience=patience_lr, min_lr=params['min_lr'])
    
    if model_type == 'lstm':
        model = create_lstm_model(input_shape, params)
    elif model_type == 'gru':
        model = create_gru_model(input_shape, params)
    elif model_type == 'wavenet':
        model = create_wavenet_model(input_shape, params)
    elif model_type == 'simple_transformer':
        model = create_simple_transformer_model(input_shape, params)
    elif model_type == 'stacked':
        model = create_stacked_model(input_shape, params)
    
    early_stopping = EarlyStopping(monitor='val_loss', patience=patience_early_stop)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=params['lr_factor'], patience=patience_lr, min_lr=params['min_lr'])

    callbacks_list = [
        early_stopping, 
        reduce_lr, 
        tensorboard_callback, 
        TFKerasPruningCallback(trial, 'val_loss')
    ]
    
    history = model.fit(
        X_train, 
        y_train, 
        validation_data=(X_test, y_test), 
        epochs=params['epochs'], 
        batch_size=params['batch_size'], 
        verbose=1, 
        callbacks=callbacks_list
    )
    combined_val_metric = history.history['combined_metric'][-1]
    
    return combined_val_metric

In [None]:
def train_best_model(X_train, y_train, X_test, y_test, study):
    best_params = study.best_params
    best_model_type = best_params['model_type']

    input_shape = (X_train.shape[1], X_train.shape[2])
    model = create_model(input_shape, best_model_type, best_params)

    early_stopping = EarlyStopping(monitor='val_loss', patience=patience_early_stop)
    model.fit(
        X_train, y_train, 
        validation_data=(X_test, y_test), 
        epochs=best_params['epochs'], 
        batch_size=best_params['batch_size'], 
        verbose=1, 
        callbacks=[early_stopping]
    )
    
    print("-" * 30)
    print(f"Best Model is: '{best_model_type}'")
    print("-" * 30)
    for key, value in best_params.items():
        if key != 'model_type':
            print(f"{key}: {value}")

    return model, best_params

In [None]:
def visualize_predictions(model, X, y, scaler, timestamps, ticker, best_model_type, time):
    y_pred = model.predict(X)
    y_pred_inv = scaler.inverse_transform(np.concatenate([X[:, 0, :-1], y_pred], axis=1))[:, -1]
    y_real_inv = scaler.inverse_transform(np.concatenate([X[:, 0, :-1], y.reshape(-1, 1)], axis=1))[:, -1]
    compare_df = pd.DataFrame({'Real': y_real_inv, 'Predicted': y_pred_inv})

    sns.set_style("whitegrid")
    plt.figure(figsize=(20, 10))

    blue = sns.color_palette("deep")[0]
    orange = sns.color_palette("deep")[1]

    image_save_path = f"../images/networks/{ticker}_{best_model_type}-{time}.png"

    sns.lineplot(x=timestamps, y=compare_df['Real'], label='Real', color=blue, linewidth=2.5)
    sns.lineplot(x=timestamps, y=compare_df['Predicted'], label='Predicted', color=orange, linewidth=2.5)

    plt.xlabel('Time Index (Daily)', fontsize=16)
    plt.ylabel('Delta (Close-Open) Price', fontsize=16)
    plt.legend(frameon=True, loc='upper right', fontsize='medium')
    plt.title(f"Actual vs. Predictions ['{best_model_type}' Model Architecture: {ticker}-{time}]", fontsize=20)

    sns.despine(left=True, bottom=True)
    plt.xticks(fontsize=14)
    plt.yticks(fontsize=14)
    plt.tight_layout()
    plt.savefig(image_save_path, bbox_inches='tight')
    plt.show()

    steps = 20*3

    plt.figure(figsize=(20, 10))
    sns.lineplot(x=timestamps[-(steps):], y=compare_df[-(steps):]['Real'], label='Real', color=blue, linewidth=2.5)
    sns.lineplot(x=timestamps[-(steps):], y=compare_df[-(steps):]['Predicted'], label='Predicted', color=orange, linewidth=2.5)

    plt.xlabel('Time Index (Daily)', fontsize=16)
    plt.ylabel('Delta (Close-Open) Price', fontsize=16)
    plt.legend(frameon=True, loc='upper right', fontsize='medium')
    plt.title(f"Actual vs. Predictions [{best_model_type}: {ticker}-{time}] (Last {steps} Trading Days)", fontsize=20)

    sns.despine(left=True, bottom=True)
    plt.xticks(fontsize=14)
    plt.yticks(fontsize=14)
    plt.tight_layout()
    plt.show()

In [None]:
lookback = 5
patience_early_stop = 5
train_for_seconds = (60 * 60) * 0.5
max_trials = 25
patience_lr = 5

model_save_path = "../models/networks/"
time = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')

# Main loop
# for ticker in json.loads(os.getenv('TICKERS')):
for ticker in ['TSLA']:
    
    log_base_path = f"../logs/tensorboard/{ticker}_"
    X_train, y_train, X_test, y_test, X, y, scaler, timestamps = preprocess_data(ticker)

    # Hyperparameter tuning with Optuna
    study = optuna.create_study(direction='minimize')
    objective_with_data = functools.partial(objective, X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test)
    study.optimize(objective_with_data, n_trials=max_trials, timeout=train_for_seconds)

    best_params = study.best_params
    
    # Train the best model
    best_model, best_params = train_best_model(X_train, y_train, X_test, y_test, study)

    # Save the best model
    model_json = best_model.to_json()
    with open(f"{model_save_path}{ticker}_{best_params['model_type']}_{time}.json", "w") as json_file:
        json_file.write(model_json)
    best_model.save_weights(f"{model_save_path}{ticker}_{best_params['model_type']}_{time}_weights.keras")
    
    # Visualize predictions
    visualize_predictions(best_model, X, y, scaler, timestamps, best_params['model_type'], ticker, time)
    
    # Cleanup
    K.clear_session()
    gc.collect()