In [1]:
import pandas as pd

data = pd.read_csv('data/^NDX_raw_data.csv')
data.rename(columns={'Date': 'date', 'Open':'open', 'High':'high', 'Low':'low', 'Close':'close', 'Volume':'volume'}, inplace=True)

data_backup = data.iloc[3524:]

data = data.iloc[:3524]
data_copy = data.copy()

In [2]:
import numpy as np

## Creating sequences
def create_dataset(dataset, time_step=1, output_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-time_step-output_step):
        a = dataset[i:(i+time_step), 0]
        b = dataset[(i+time_step):(i+time_step)+output_step, 0]
        dataX.append(a)
        dataY.append(b)

    return np.array(dataX), np.array(dataY)

In [4]:
period = 60
trend_period = 14
num_features = 1
input_period = 46
output_step = 7
units = 128

In [5]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# Build LSTM model
model = Sequential([
    LSTM(units=units, input_shape=(input_period, num_features)),
    #LSTM(units=units, return_sequences=True, input_shape=(period, num_features)),
    #Dropout(0.2),
    #LSTM(units=units, return_sequences=False),
    #Dropout(0.2),
    Dense(output_step)
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Define a ModelCheckpoint callback to save weights at the end of each epoch
checkpoint = ModelCheckpoint(filepath='model_weights_3/model_weights_epoch_{epoch:02d}.h5', 
                            save_best_only=True, save_weights_only=True)

# Adding early stopping to prevent overfitting
#early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

In [6]:
best_epoch_backup = 63

# Load the weights of the model at the chosen epoch
model.load_weights(f'model_weights_3/model_weights_epoch_{best_epoch_backup:02d}.h5')
print('Backup: Weigths for the best epoch has been loaded.')

Backup: Weigths for the best epoch has been loaded.


## data reading for all stocks:

In [7]:
import os
import re

directory = 'data'

df_data = dict()

for file in os.listdir(directory):
    symbol_pattern = re.match(r'([^_]+)_', file)
    symbol = symbol_pattern.group(1)
    df_data[symbol] = pd.read_csv(os.path.join(directory, file))
    df_data[symbol].rename(columns={'Date': 'date', 'Open':'open', 'High':'high', 'Low':'low', 'Close':'close', 'Volume':'volume'}, inplace=True)

In [8]:
dict_X_test, dict_y_test = dict(), dict()

for symbol in df_data:
    X_test, y_test = create_dataset(df_data[symbol][['close']].to_numpy(), time_step=period, output_step=output_step)

    dict_X_test[symbol] = X_test
    dict_y_test[symbol] = y_test

In [9]:
from statsmodels.tsa.seasonal import seasonal_decompose

dict_decompositions = dict()
dict_trends = dict()

for symbol in df_data:
    decompositions_test = np.array([seasonal_decompose(dict_X_test[symbol][i], model='additive', period=14) for i in range(dict_X_test[symbol].shape[0])])
    trends_test = np.array([decompositions_test[i].trend for i in range(decompositions_test.shape[0])])

    dict_decompositions[symbol] = decompositions_test
    dict_trends[symbol] = trends_test

In [10]:
dict_trends_dropna = dict()

for symbol in df_data:
    trends_test_dropna = list()
    
    for trend in dict_trends[symbol]:
        trends_test_dropna.append(trend[~np.isnan(trend)])

    trends_test_dropna = np.array(trends_test_dropna)

    dict_trends_dropna[symbol] = trends_test_dropna

In [11]:
from sklearn.preprocessing import MinMaxScaler

dict_scalers_trend = dict()
dict_scalers_target = dict()
dict_trends_scaled = dict()
dict_targets_scaled = dict()

for symbol in df_data:
    scaler_trend = list(MinMaxScaler() for i in range(dict_trends_dropna[symbol].shape[0]))
    trends_test_scaled = list()

    # we use target values only for comparison issue here
    scaler_target = list(MinMaxScaler() for i in range(dict_y_test[symbol].shape[0]))
    target_test_scaled = list()

    for i in range(dict_trends_dropna[symbol].shape[0]):
        trends_test_scaled.append(scaler_trend[i].fit_transform(dict_trends_dropna[symbol][i].reshape(-1,1)))

    for j in range(dict_y_test[symbol].shape[0]):
        target_test_scaled.append(scaler_target[j].fit_transform(dict_y_test[symbol][j].reshape(-1,1)))

    trends_test_scaled = np.array(trends_test_scaled)
    target_test_scaled = np.array(target_test_scaled)

    dict_scalers_trend[symbol] = scaler_trend
    dict_scalers_target[symbol] = scaler_target
    dict_trends_scaled[symbol] = trends_test_scaled
    dict_targets_scaled[symbol] = target_test_scaled

In [13]:
# EVALUATION CAN BE MADE HERE IF WANTED

In [None]:
dict_predictions = dict()

for symbol in df_data:
    trend_predictions = list()

    print(f'predicting for {symbol}')
    
    for j in range(dict_trends_scaled[symbol].shape[0]):
        #print(f'{j+1} for {symbol}')
        trend_predictions.append(
            dict_scalers_trend[symbol][j].inverse_transform(
                model.predict(dict_trends_scaled[symbol][j].reshape(dict_trends_scaled[symbol][j].shape[1], input_period, num_features), verbose=0)
            )
        )
    
    trend_predictions = np.array(trend_predictions)

    dict_predictions[symbol] = trend_predictions

1 for AAPL
2 for AAPL
3 for AAPL
4 for AAPL
5 for AAPL
6 for AAPL
7 for AAPL
8 for AAPL
9 for AAPL
10 for AAPL
11 for AAPL
12 for AAPL
13 for AAPL
14 for AAPL
15 for AAPL
16 for AAPL
17 for AAPL
18 for AAPL
19 for AAPL
20 for AAPL
21 for AAPL
22 for AAPL
23 for AAPL
24 for AAPL
25 for AAPL
26 for AAPL
27 for AAPL
28 for AAPL
29 for AAPL
30 for AAPL
31 for AAPL
32 for AAPL
33 for AAPL
34 for AAPL
35 for AAPL
36 for AAPL
37 for AAPL
38 for AAPL
39 for AAPL
40 for AAPL
41 for AAPL
42 for AAPL
43 for AAPL
44 for AAPL
45 for AAPL
46 for AAPL
47 for AAPL
48 for AAPL
49 for AAPL
50 for AAPL
51 for AAPL
52 for AAPL
53 for AAPL
54 for AAPL
55 for AAPL
56 for AAPL
57 for AAPL
58 for AAPL
59 for AAPL
60 for AAPL
61 for AAPL
62 for AAPL
63 for AAPL
64 for AAPL
65 for AAPL
66 for AAPL
67 for AAPL
68 for AAPL
69 for AAPL
70 for AAPL
71 for AAPL
72 for AAPL
73 for AAPL
74 for AAPL
75 for AAPL
76 for AAPL
77 for AAPL
78 for AAPL
79 for AAPL
80 for AAPL
81 for AAPL
82 for AAPL
83 for AAPL
84 for AAPL
8