In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

In [2]:

import math
import pickle

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

import statsmodels.api as sm
from scipy.special import expit
from scipy import stats
import seaborn as sns


from sklearn.utils import shuffle as sklearn_shuffle

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import xgboost as xgb
import lightgbm as lgb
from catboost import CatBoostRegressor

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, LSTM, Bidirectional, TimeDistributed
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping


# from misc_modules import dm_test, plot_double_standard


2024-09-24 18:10:40.262355: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-09-24 18:10:40.274141: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-09-24 18:10:40.277996: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-09-24 18:10:40.286986: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
etfs = ['ICLN', 'PBD', 'QCLN']  # ETF symbols
sequence_size = 5  # Number of time steps in sequence
cross_window = 3  # Number of cross-validation windows

lags = [1]
predType = 'ahead_Return'
predLabel = 'Log-Return'

pred_size=250
model_name = f'reg-{predType}'

save_path = '../../results'
data_path = '../../data'
# 'GT Sent', 'INV Sent', 'GT_VAL_SENT', 'INV_VAL_SENT',
sent_dict =  {
    'SENT': [ 'log_ovx', 'log_return', 'log_navR', 'GT Sent', 'INV Sent', 'd1-inv','d2-gt'],
    'NO_SENT': [ 'log_ovx', 'log_return', 'log_navR']
}


In [4]:
def root_mean_squared_loss(y_true,y_pred):
    return tf.sqrt(tf.reduce_mean(tf.square(y_true - y_pred)))

def huber_loss(y_true, y_pred, delta=1.0):
    return tf.keras.losses.Huber(delta=delta)(y_true, y_pred)

def mean_squared_log_error(y_true, y_pred):
    return tf.keras.losses.MeanSquaredLogarithmicError()(y_true, y_pred)


class model:
    def __init__(self, model_type, input_shape=None):
        self.model_type = model_type
        self.input_shape = input_shape
        self.model = self._choose_model()

    def _choose_model(self):
        """Choose and instantiate the model based on the provided model type."""
        if self.model_type == 'svr':
            return SVR(kernel='rbf', C=1.0, gamma='scale')
        elif self.model_type == 'random_forest':
            return RandomForestRegressor(n_estimators=100, max_depth=None, min_samples_split=2, random_state=42)
        elif self.model_type == 'xgboost':
            return xgb.XGBRegressor(learning_rate=0.01, max_depth=6, n_estimators=100, subsample=0.8, random_state=42)
        elif self.model_type == 'lightgbm':
            return lgb.LGBMRegressor(num_leaves=31, learning_rate=0.01, n_estimators=100, bagging_fraction=0.8, random_state=42)
        elif self.model_type == 'catboost':
            return CatBoostRegressor(iterations=500, depth=6, learning_rate=0.1, verbose=0, random_state=42)
        elif self.model_type == 'cnnlstm':
            if self.input_shape is None:
                raise ValueError("input_shape must be provided for cnnlstm model.")
            return self._build_cnn_lstm_model()
        else:
            raise ValueError(f"Invalid model type: {self.model_type}")

    def _build_cnn_lstm_model(self):
        """Build the CNN-LSTM model for time series or sequence data."""
        Timesteps, No_Features = self.input_shape
        model = Sequential()
        model.add(Conv1D(filters=32, kernel_size=1, activation='relu', input_shape=(Timesteps, No_Features)))
        model.add(MaxPooling1D(pool_size=1))
        model.add(LSTM(32, activation='relu'))
        model.add(Flatten())
        model.add(Dense(8))
        model.add(Dense(1))
        model.compile(optimizer='adam', loss=huber_loss, metrics=['mae'])
        return model

    def fit(self, X_train, y_train,epochs=100, batch_size=32, validation_split=0.2,shuffle=True):
        """Fit the model depending on the type."""

        
        if self.model_type in ['svr', 'random_forest', 'xgboost', 'lightgbm', 'catboost']:
            if shuffle:
                X_train, y_train = sklearn_shuffle(X_train, y_train, random_state=42)
            self.model.fit(X_train, y_train)
        elif self.model_type == 'cnnlstm':
            X_train = X_train.reshape(-1, *self.input_shape)
            # early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
            self.model.fit(X_train, y_train, epochs=epochs, verbose=False,shuffle=shuffle)

    def predict(self, X):
        """Predict using the chosen model."""
        if self.model_type in ['svr', 'random_forest', 'xgboost', 'lightgbm', 'catboost']:
            return self.model.predict(X)
        elif self.model_type == 'cnnlstm':
            X = X.reshape(-1, *self.input_shape)
            return self.model.predict(X, verbose=False)



In [5]:
def fetch(file, lag):
    data = pd.read_csv(f"{data_path}/{file}/{file}_INPUT.csv")
    data['ahead_Return'] = data['log_return'].shift(-1 * lag)
    data['ahead_vol'] = data['Garchvol'].shift(-1 * lag)
    data['ahead_mvol'] = data['MAvol'].shift(-1*(lag))
    data = data[:-1 * lag]  # Drop rows corresponding to lag
    data.reset_index(drop=True, inplace=True)
    return data

# Create sequences of input data
def sequences(X, y, timesteps):
    """
    Generate sequences for time series models.
    """
    X = np.asarray(X)
    y = np.asarray(y)
    alpha, beta = [], []
    n = timesteps
    for i in range(X.shape[0]):
        if i < n - 1:
            continue
        alpha.append(X[i - (n - 1):i + 1])
        beta.append(y[i])

    return np.asarray(alpha), np.asarray(beta)


In [6]:

# Define helper functions
def concat_results_and_mean(arrays):
    """
    Concatenate and compute mean across all arrays.
    """
    new_arrays = [np.array(single_arr).reshape(-1) for single_arr in arrays]
    mean_array = np.mean(new_arrays, axis=0)
    return mean_array

def getanalysis(y_true, y_pred):
    """
    Calculate MAE and Directional Accuracy.
    """
    mae = mean_absolute_error(y_true, y_pred)
    da = directional_accuracy(y_true, y_pred)
    return mae, da

def directional_accuracy(y_true, y_pred):
    """
    Calculate Directional Accuracy (DA).
    """

    correct_directions = np.sign(y_true) == np.sign(y_pred)
    # correct_directions = np.sign(y_true[1:] - y_true[:-1]) == np.sign(y_pred[1:] - y_pred[:-1])
    
    return np.mean(correct_directions)


In [7]:

def analyze_etfs(etfs, predType, data_path):
    for etf in etfs:
        # Fetch the data
        data = fetch(etf, 1)
        
        # Display basic info about the dataset
        # print(f"\nBasic Info for {etf}:\n", data.info())

        # Display descriptive statistics for the specified prediction type
        print(f"\nDescriptive Statistics for '{predType}' column in {etf}:\n")
        stats = pd.DataFrame(data[predType]).describe()
        print(stats)

        # Visualize distribution of the specified prediction type using a histogram
        plt.figure(figsize=(7, 6))
        sns.histplot(data[predType], kde=True, color='blue')
        plt.title(f'Distribution of {predType} for {etf}')
        plt.show()

        # Visualize boxplot for the specified prediction type
        plt.figure(figsize=(7, 6))
        sns.boxplot(x=data[predType], color='blue')
        plt.title(f'Boxplot for {predType} for {etf}')
        plt.show()

# Call the function
# analyze_etfs(etfs, predType, data_path)

In [8]:
def sliding_window_train(model,X ,Y , train_window=500, test_window = 10,  pred_size=pred_size,sequence_size=sequence_size) :

    result = []

    # .reshape(X[:-pred_size].shape[0],1,sequence_size,-1)
    model.fit(X[:-pred_size], Y[:-pred_size], epochs=200)

    if model.model_type == 'cnnlstm' : 
        # tw_start and tw_end , means training window start index and end index
        for tw_end in range(X.shape[0] - pred_size, X.shape[0], 10):
            tw_start = tw_end - train_window
    
            X_train, y_train = X[tw_start:tw_end], Y[tw_start:tw_end]
            # X_train = X_train.reshape(X_train.shape[0],1,sequence_size,-1)
            model.fit(X_train, y_train, epochs=50)
    
            test = X[tw_end:tw_end+test_window]  #.reshape(X[tw_end:tw_end+test_window].shape[0],1,sequence_size,-1)
            preds = model.predict(test)
            result.extend(preds)
            print(tw_end, end=' ')
    else :
        result.extend(model.predict(X[-pred_size:]))

    rmse_val = root_mean_squared_loss(Y[-pred_size:].reshape(-1), np.array(result).reshape(-1))

    print(f"model RMSE = {rmse_val}")

    return np.array(result), model

In [9]:
def train(etf, sequence_size, cross_window, model_type, sent_cols, no_sent_cols):
    lag = 1
    data = fetch(etf, lag) 

    SENT_X = data[sent_cols]
    # Y = data[[predType]]  
    NO_SENT_X = data[no_sent_cols]
    
    Y = np.array(data[[predType]])  

    SENT_X, SENT_Y = sequences(SENT_X, Y, timesteps=sequence_size)
    NO_SENT_X, NO_SENT_Y = sequences(NO_SENT_X, Y, timesteps=sequence_size)


    sent_shape = SENT_X.shape[1:]
    no_sent_shape = NO_SENT_X.shape[1:]

    sent_model = model(model_type,sent_shape)
    no_sent_model = model(model_type,no_sent_shape)

        # machine learning regression models
    SENT_X = SENT_X.reshape(SENT_X.shape[0], -1)
    NO_SENT_X = NO_SENT_X.reshape(NO_SENT_X.shape[0], -1)
    SENT_Y = SENT_Y.reshape(-1,)
    NO_SENT_Y = NO_SENT_Y.reshape(-1,)


    Y_PRED = Y[-pred_size:]  # Actual values for the prediction window
    sent_predictions, no_sent_predictions = [], []
    act_values = Y_PRED  # Actual values for this window
    
    # Perform cross validation over cross_window
    for k in range(cross_window):

        # Train and predict for SENT model
        sent_pred, sent_model = sliding_window_train(sent_model,SENT_X, SENT_Y)
        no_sent_pred, no_sent_model = sliding_window_train(no_sent_model,NO_SENT_X, NO_SENT_Y)
        
        sent_predictions.append(sent_pred)
        no_sent_predictions.append(no_sent_pred)

        mae_sent_temp, da_sent_temp = getanalysis(act_values, sent_pred)
        mae_no_sent_temp, da_no_sent_temp = getanalysis(act_values, no_sent_pred)
    
        # Print metrics for the current ETF
        print(etf , {
            "sent": {"mae": mae_sent_temp, "da": da_sent_temp},
            "no_sent": {"mae": mae_no_sent_temp, "da": da_no_sent_temp}
        })
    
        mean_sent = (sent_predictions[-1]) 
        mean_no_sent = (no_sent_predictions[-1])  

    # Get prediction metrics for SENT and NO_SENT
    mae_sent, da_sent = getanalysis(act_values, mean_sent)
    mae_no_sent, da_no_sent = getanalysis(act_values, mean_no_sent)

    # Print metrics for the current ETF
    print(etf , {
        "sent": {"mae": mae_sent, "da": da_sent},
        "no_sent": {"mae": mae_no_sent, "da": da_no_sent}
    })
    
    # Return the results for this ETF
    return {
        "act" : act_values,
        "sent": {"predictions": mean_sent, "mae": mae_sent, "da": da_sent},
        "no_sent": {"predictions": mean_no_sent, "mae": mae_no_sent, "da": da_no_sent}
    }

# Loop through ETFs and aggregate results
def run_all_etfs(etfs, model_type, sequence_size=sequence_size, cross_window=cross_window):
    sent_results, no_sent_results = [], []
    actual_array = []
    
    for etf in etfs:
        print(f"Training for ETF: {etf}")
        sent_cols = sent_dict['SENT']  # Columns for SENT model
        no_sent_cols = sent_dict['NO_SENT']  # Columns for NO_SENT model

        # Train and get results for the ETF
        result = train(etf, sequence_size, cross_window, model_type, sent_cols, no_sent_cols)
        
        # Store the predictions and actual values
        sent_results.append(result['sent']['predictions'])
        no_sent_results.append(result['no_sent']['predictions'])
        actual_array.append(result['act'])  # Collect actual values

    # Concatenate results across all ETFs
    sent_results = np.concatenate(sent_results, axis=0)
    no_sent_results = np.concatenate(no_sent_results, axis=0)
    actual_array = np.concatenate(actual_array, axis=0)
    
    # Calculate overall metrics
    mae_sent, da_sent = getanalysis(actual_array, sent_results)
    mae_no_sent, da_no_sent = getanalysis(actual_array, no_sent_results)

    # Print combined metrics
    print("Combined results:")
    print(f"SENT - MAE: {mae_sent}, DA: {da_sent}")
    print(f"NO_SENT - MAE: {mae_no_sent}, DA: {da_no_sent}")
    
    return sent_results, no_sent_results, actual_array


In [10]:
model_type = 'cnnlstm'

sent_results, no_sent_results, actual_array = run_all_etfs(etfs, model_type)

Training for ETF: ICLN


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2024-09-24 18:10:41.939959: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:266] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected


1233 1243 1253 1263 1273 1283 1293 1303 1313 1323 1333 1343 1353 1363 1373 1383 1393 1403 1413 1423 1433 1443 1453 1463 1473 model RMSE = 0.014323566466674828
1233 1243 1253 1263 1273 1283 1293 1303 1313 1323 1333 1343 1353 1363 1373 1383 1393 1403 1413 1423 1433 1443 1453 1463 1473 model RMSE = 0.012681585687461154
ICLN {'sent': {'mae': 0.011281028677051679, 'da': 0.456}, 'no_sent': {'mae': 0.009219737613735459, 'da': 0.484}}
1233 1243 1253 1263 1273 1283 1293 1303 1313 1323 1333 1343 1353 1363 1373 1383 1393 1403 1413 1423 1433 1443 1453 1463 1473 model RMSE = 0.012498179771217923
1233 1243 1253 1263 1273 1283 1293 1303 1313 1323 1333 1343 1353 1363 1373 1383 1393 1403 1413 1423 1433 1443 1453 1463 1473 model RMSE = 0.013577573302722796
ICLN {'sent': {'mae': 0.00990782992257968, 'da': 0.512}, 'no_sent': {'mae': 0.009992238171274205, 'da': 0.568}}
1233 1243 1253 1263 1273 1283 1293 1303 1313 1323 1333 1343 1353 1363 1373 1383 1393 1403 1413 1423 1433 1443 1453 1463 1473 model RMSE = 0

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


1233 1243 1253 1263 1273 1283 1293 1303 1313 1323 1333 1343 1353 1363 1373 1383 1393 1403 1413 1423 1433 1443 1453 1463 1473 model RMSE = 0.012951488426167009
1233 1243 1253 1263 1273 1283 1293 1303 1313 1323 1333 1343 1353 1363 1373 1383 1393 1403 1413 1423 1433 1443 1453 1463 1473 model RMSE = 0.010959785555566284
PBD {'sent': {'mae': 0.00992335083071756, 'da': 0.452}, 'no_sent': {'mae': 0.008489079281547346, 'da': 0.472}}
1233 1243 1253 1263 1273 1283 1293 1303 1313 1323 1333 1343 1353 1363 1373 1383 1393 1403 1413 1423 1433 1443 1453 1463 1473 model RMSE = 0.011350221873430951
1233 1243 1253 1263 1273 1283 1293 1303 1313 1323 1333 1343 1353 1363 1373 1383 1393 1403 1413 1423 1433 1443 1453 1463 1473 model RMSE = 0.01047723484838885
PBD {'sent': {'mae': 0.008736870403851115, 'da': 0.564}, 'no_sent': {'mae': 0.007916124406060467, 'da': 0.544}}
1233 1243 1253 1263 1273 1283 1293 1303 1313 1323 1333 1343 1353 1363 1373 1383 1393 1403 1413 1423 1433 1443 1453 1463 1473 model RMSE = 0.01

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


1233 1243 1253 1263 1273 1283 1293 1303 1313 1323 1333 1343 1353 1363 1373 1383 1393 1403 1413 1423 1433 1443 1453 1463 1473 model RMSE = 0.018388096649607864
1233 1243 1253 1263 1273 1283 1293 1303 1313 1323 1333 1343 1353 1363 1373 1383 1393 1403 1413 1423 1433 1443 1453 1463 1473 model RMSE = 0.018265272495560588
QCLN {'sent': {'mae': 0.0142866319766242, 'da': 0.424}, 'no_sent': {'mae': 0.013541103520262908, 'da': 0.496}}
1233 1243 1253 1263 1273 1283 1293 1303 1313 1323 1333 1343 1353 1363 1373 1383 1393 1403 1413 1423 1433 1443 1453 1463 1473 model RMSE = 0.01584800675921326
1233 1243 1253 1263 1273 1283 1293 1303 1313 1323 1333 1343 1353 1363 1373 1383 1393 1403 1413 1423 1433 1443 1453 1463 1473 model RMSE = 0.016762576348723596
QCLN {'sent': {'mae': 0.011950078772029141, 'da': 0.58}, 'no_sent': {'mae': 0.012397871636908055, 'da': 0.596}}
1233 1243 1253 1263 1273 1283 1293 1303 1313 1323 1333 1343 1353 1363 1373 1383 1393 1403 1413 1423 1433 1443 1453 1463 1473 model RMSE = 0.01

In [11]:
model_type = 'svr'

sent_results, no_sent_results, actual_array = run_all_etfs(etfs, model_type)

Training for ETF: ICLN
model RMSE = 0.009826896494937355
model RMSE = 0.009826896494937355
ICLN {'sent': {'mae': 0.007641349786716184, 'da': 0.42}, 'no_sent': {'mae': 0.007641349786716184, 'da': 0.42}}
model RMSE = 0.009826896494937355
model RMSE = 0.009826896494937355
ICLN {'sent': {'mae': 0.007641349786716184, 'da': 0.42}, 'no_sent': {'mae': 0.007641349786716184, 'da': 0.42}}
model RMSE = 0.009826896494937355
model RMSE = 0.009826896494937355
ICLN {'sent': {'mae': 0.007641349786716184, 'da': 0.42}, 'no_sent': {'mae': 0.007641349786716184, 'da': 0.42}}
ICLN {'sent': {'mae': 0.007641349786716184, 'da': 0.42}, 'no_sent': {'mae': 0.007641349786716184, 'da': 0.42}}
Training for ETF: PBD
model RMSE = 0.009892896803594474
model RMSE = 0.009892896803594474
PBD {'sent': {'mae': 0.007975929651694157, 'da': 0.38}, 'no_sent': {'mae': 0.007975929651694157, 'da': 0.38}}
model RMSE = 0.009892896803594474
model RMSE = 0.009892896803594474
PBD {'sent': {'mae': 0.007975929651694157, 'da': 0.38}, 'no_s

In [12]:
model_type = 'random_forest'

sent_results, no_sent_results, actual_array = run_all_etfs(etfs, model_type)

Training for ETF: ICLN
model RMSE = 0.009957530952751176
model RMSE = 0.009814806126302275
ICLN {'sent': {'mae': 0.007637142190159056, 'da': 0.478688}, 'no_sent': {'mae': 0.007496233111414595, 'da': 0.476896}}
model RMSE = 0.009957530952751176
model RMSE = 0.009814806126302275
ICLN {'sent': {'mae': 0.007637142190159056, 'da': 0.478688}, 'no_sent': {'mae': 0.007496233111414595, 'da': 0.476896}}
model RMSE = 0.009957530952751176
model RMSE = 0.009814806126302275
ICLN {'sent': {'mae': 0.007637142190159056, 'da': 0.478688}, 'no_sent': {'mae': 0.007496233111414595, 'da': 0.476896}}
ICLN {'sent': {'mae': 0.007637142190159056, 'da': 0.478688}, 'no_sent': {'mae': 0.007496233111414595, 'da': 0.476896}}
Training for ETF: PBD
model RMSE = 0.008546313971301032
model RMSE = 0.008602485480459601
PBD {'sent': {'mae': 0.006516865701791441, 'da': 0.484}, 'no_sent': {'mae': 0.006549221999177294, 'da': 0.478176}}
model RMSE = 0.008546313971301032
model RMSE = 0.008602485480459601
PBD {'sent': {'mae': 0.0

In [13]:
model_type = 'xgboost'

sent_results, no_sent_results, actual_array = run_all_etfs(etfs, model_type)

Training for ETF: ICLN
model RMSE = 0.009643240647717176
model RMSE = 0.00970742442426629
ICLN {'sent': {'mae': 0.007465772474140169, 'da': 0.470624}, 'no_sent': {'mae': 0.007508645500159895, 'da': 0.4648}}
model RMSE = 0.009643240647717176
model RMSE = 0.00970742442426629
ICLN {'sent': {'mae': 0.007465772474140169, 'da': 0.470624}, 'no_sent': {'mae': 0.007508645500159895, 'da': 0.4648}}
model RMSE = 0.009643240647717176
model RMSE = 0.00970742442426629
ICLN {'sent': {'mae': 0.007465772474140169, 'da': 0.470624}, 'no_sent': {'mae': 0.007508645500159895, 'da': 0.4648}}
ICLN {'sent': {'mae': 0.007465772474140169, 'da': 0.470624}, 'no_sent': {'mae': 0.007508645500159895, 'da': 0.4648}}
Training for ETF: PBD
model RMSE = 0.008461430503803843
model RMSE = 0.008507158149044214
PBD {'sent': {'mae': 0.006364578746798856, 'da': 0.480672}, 'no_sent': {'mae': 0.006396364957151655, 'da': 0.46736}}
model RMSE = 0.008461430503803843
model RMSE = 0.008507158149044214
PBD {'sent': {'mae': 0.0063645787

In [14]:
model_type = 'lightgbm'

sent_results, no_sent_results, actual_array = run_all_etfs(etfs, model_type)

Training for ETF: ICLN
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000480 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6395
[LightGBM] [Info] Number of data points in the train set: 1233, number of used features: 35
[LightGBM] [Info] Start training from score -0.000190
model RMSE = 0.009802291813497755
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000122 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3825
[LightGBM] [Info] Number of data points in the train set: 1233, number of used features: 15
[LightGBM] [Info] Start training from score -0.000190
model RMSE = 0.009786478609854357
ICLN {'sent': {'mae': 0.0075405494429502925, 'da': 0.475104}, 'no_sent': {'mae': 0.0075796632769853855, 'da': 0.474656}}
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000348 seconds.


In [15]:
model_type = 'catboost'

sent_results, no_sent_results, actual_array = run_all_etfs(etfs, model_type)

Training for ETF: ICLN
model RMSE = 0.010350939047648932
model RMSE = 0.01056450062434126
ICLN {'sent': {'mae': 0.008054770336477541, 'da': 0.478688}, 'no_sent': {'mae': 0.008210034885370929, 'da': 0.474208}}
model RMSE = 0.010350939047648932
model RMSE = 0.01056450062434126
ICLN {'sent': {'mae': 0.008054770336477541, 'da': 0.478688}, 'no_sent': {'mae': 0.008210034885370929, 'da': 0.474208}}
model RMSE = 0.010350939047648932
model RMSE = 0.01056450062434126
ICLN {'sent': {'mae': 0.008054770336477541, 'da': 0.478688}, 'no_sent': {'mae': 0.008210034885370929, 'da': 0.474208}}
ICLN {'sent': {'mae': 0.008054770336477541, 'da': 0.478688}, 'no_sent': {'mae': 0.008210034885370929, 'da': 0.474208}}
Training for ETF: PBD
model RMSE = 0.00880482914762377
model RMSE = 0.008949021955244153
PBD {'sent': {'mae': 0.006781033017919884, 'da': 0.49648}, 'no_sent': {'mae': 0.006844851492084243, 'da': 0.480672}}
model RMSE = 0.00880482914762377
model RMSE = 0.008949021955244153
PBD {'sent': {'mae': 0.0067