## Loading

In [1]:
%load_ext autoreload
%autoreload 2

In [84]:
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset, TensorDataset
import helper 
from models_stationary import *
import pywt
from torch.optim.lr_scheduler import StepLR

current_path = os.getcwd()

random_state = helper.RANDOM_STATE

# Define a context manager to temporarily suppress FutureWarnings
class SuppressFutureWarnings:
    def __enter__(self):
        warnings.filterwarnings('ignore', category=FutureWarning)
    
    def __exit__(self, exc_type, exc_value, traceback):
        warnings.filterwarnings('default')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [3]:
STOCKS = ["TSLA", "AAPL", 'QQQ', "SPY", "MSFT", "AMZN", "GOOG", "DIA", "^IRX"]
START_DATE = helper.START_DATE
END_DATE = helper.END_DATE
stock_data = {}
MAs = [5, 10, 20, 50, 100, 200]
for stock in STOCKS: 
    data_path = os.path.join(current_path, "data", f"{stock}_{START_DATE}_{END_DATE}.csv")
    data = pd.read_csv(data_path)
    data['Date'] = pd.to_datetime(data['Date'])
    if stock != "^IRX":
        data['RSI'] = helper.compute_rsi(data['Close'])
        data['rsi_class'] = helper.compute_rsi_class(data)  # Assuming you have this function in helper
        #data['volume_class'] = helper.compute_volume_class(data)  # Add volume analysis
        data = helper.calculate_mas(data, MAs, column_name="Close")
        data['WVAD'] = helper.calculate_wvad(data, period=14)
        data['ROC'] = helper.calculate_roc(data, period=14)
        data['MACD'], data['macd_line'], data['signal_line'] = helper.calculate_macd(data, short_window=12, long_window=26, signal_window=9)
        data['CCI'] =  helper.calculate_cci(data, period=20)
        data['Upper Band'], data['Lower Band'], data['SMA'] = helper.calculate_bollinger_bands(data, window=20, num_std_dev=2)
        data['SMI'] = helper.calculate_smi(data, period=14, signal_period=3)
        data['ATR'] = helper.calculate_atr(data, period=14)
        data[['WVF', 'upperBand', 'rangeHigh', 'WVF_color']] = helper.cm_williams_vix_fix(data['Close'], data['Low'])
        data[['Buy_Signal', 'Sell_Signal', 'BB_Upper', 'BB_Lower']] = helper.bollinger_rsi_strategy(data['Close'])
        data = helper.on_balance_volume(data)
        data = helper.volume_price_trend(data)
        data = helper.money_flow_index(data)
        data = helper.accumulation_distribution(data)
        data = data.dropna()
    stock_data[stock] = data
    print(f"Data fetched for {stock}")

Data fetched for TSLA
Data fetched for AAPL
Data fetched for QQQ
Data fetched for SPY
Data fetched for MSFT
Data fetched for AMZN
Data fetched for GOOG
Data fetched for DIA
Data fetched for ^IRX


## Processing

In [78]:
stock = 'AAPL'
df_stock = stock_data[stock].copy()
if len(df_stock) %2 != 0:
    df_stock = df_stock[:-1]
close_prices_Y = df_stock['Close']

y_index = df_stock.columns.get_loc('Close')
df_stock_swt = helper.apply_stationary_wavelet_transform(df_stock)

scaler = MinMaxScaler(feature_range=(0, 1))
df_stock_swt = scaler.fit_transform(df_stock_swt)


sequence_length = 200  # days in the input sequence
prediction_length = 5  # days to predict


def create_sequences(input_data, target_data, sequence_length, prediction_length):
    xs, ys = [], []
    for i in range(len(input_data) - sequence_length - prediction_length + 1):
        xs.append(input_data[i:(i + sequence_length)])
        ys.append(target_data[(i + sequence_length):(i + sequence_length + prediction_length)])
    return np.array(xs), np.array(ys)

X, y = create_sequences(df_stock_swt, close_prices_Y, sequence_length, prediction_length)

y = scaler.fit_transform(y)

X_tensor = torch.tensor(X, dtype=torch.float32)
y_tensor = torch.tensor(y, dtype=torch.float32)
train_size = int(0.8 * len(X_tensor))
X_train_tensor = X_tensor[:train_size]
y_train_tensor = y_tensor[:train_size]
X_test_tensor = X_tensor[train_size:]
y_test_tensor = y_tensor[train_size:]
train_data = TensorDataset(X_train_tensor, y_train_tensor)
test_data = TensorDataset(X_test_tensor, y_test_tensor)
batch_size = 32
train_loader = DataLoader(train_data, shuffle=False, batch_size=batch_size)
test_loader = DataLoader(test_data, shuffle=False, batch_size=batch_size)

Date is not numerical or is the target variable
rsi_class is not numerical or is the target variable


# LSTM

In [81]:
class StockPriceLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, dropout_prob, weight_decay):
        super(StockPriceLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_prob, bidirectional=False, bias=True)
        self.fc = nn.Linear(hidden_size, output_size)  # Output size does not depend on bidirectional
        
        # Add Batch Normalization Layer
        self.batchnorm = nn.BatchNorm1d(hidden_size)
        
        self.weight_decay = weight_decay  # Regularization parameter

    def forward(self, x):
        # Initialize hidden state with zeros
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        
        # Forward propagate LSTM
        out, _ = self.lstm(x, (h0, c0))  # Output shape: (batch_size, seq_length, hidden_size)
        
        # Apply batch normalization
        out = out.transpose(1, 2)  # Adjust for batch normalization
        out = self.batchnorm(out)
        out = out.transpose(1, 2)  # Adjust back to the original shape
        
        # Decode the hidden state of the last time step
        out = self.fc(out[:, -1, :])  # Output shape: (batch_size, output_size)
        return out

# Training & Testing

In [90]:
# Loss and Optimizer
input_size = X.shape[2]  # Adjust based on your input features
hidden_size = 50
num_layers = 3
output_size = prediction_length
dropout_prob = 0.4
weight_decay = 0.001  
model = StockPriceLSTM(input_size, hidden_size, num_layers, output_size, dropout_prob, weight_decay).to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
scheduler = StepLR(optimizer, step_size=10, gamma=0.5)

# Training Loop
num_epochs = 50
for epoch in range(num_epochs):
    for inputs, targets in train_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
    scheduler.step()
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

model.eval()
test_losses = []
with torch.no_grad():
    for inputs, targets in test_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        test_losses.append(loss.item())
    

average_loss = np.mean(test_losses)
print(f'Average Test Loss: {average_loss:.4f}')

Epoch [1/50], Loss: 0.0184
Epoch [2/50], Loss: 0.0138
Epoch [3/50], Loss: 0.0118
Epoch [4/50], Loss: 0.0147
Epoch [5/50], Loss: 0.0181
Epoch [6/50], Loss: 0.0183
Epoch [7/50], Loss: 0.0179
Epoch [8/50], Loss: 0.0182
Epoch [9/50], Loss: 0.0183
Epoch [10/50], Loss: 0.0184
Epoch [11/50], Loss: 0.0342
Epoch [12/50], Loss: 0.0304
Epoch [13/50], Loss: 0.0251
Epoch [14/50], Loss: 0.0213
Epoch [15/50], Loss: 0.0196
Epoch [16/50], Loss: 0.0192
Epoch [17/50], Loss: 0.0185
Epoch [18/50], Loss: 0.0189
Epoch [19/50], Loss: 0.0188
Epoch [20/50], Loss: 0.0193
Epoch [21/50], Loss: 0.0939
Epoch [22/50], Loss: 0.0881
Epoch [23/50], Loss: 0.0893
Epoch [24/50], Loss: 0.0895
Epoch [25/50], Loss: 0.0895
Epoch [26/50], Loss: 0.0895
Epoch [27/50], Loss: 0.0895
Epoch [28/50], Loss: 0.0896
Epoch [29/50], Loss: 0.0896
Epoch [30/50], Loss: 0.0896
Epoch [31/50], Loss: 0.0928
Epoch [32/50], Loss: 0.0947
Epoch [33/50], Loss: 0.0954
Epoch [34/50], Loss: 0.0956
Epoch [35/50], Loss: 0.0957
Epoch [36/50], Loss: 0.0957
E

In [57]:
# Making predictions (example on the test set)
model.eval()
with torch.no_grad():
    predictions = []
    for inputs, _ in test_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = model(inputs)
        predictions.append(outputs.cpu().numpy())

# Convert predictions to a single array
predictions = np.concatenate(predictions, axis=0)



### All Data

In [94]:
last_sequence.shape

(200, 70)

In [92]:
last_sequence = df_stock_swt[-sequence_length:]
last_sequence = scaler.transform(last_sequence)
last_sequence = torch.tensor(last_sequence, dtype=torch.float32).unsqueeze(0).to(device)

# Making the prediction
model.eval()
with torch.no_grad():
    prediction = model(last_sequence)
    prediction = prediction.cpu().numpy()

actual_prices = close_prices_Y[-sequence_length:].values

dates = pd.to_datetime(df_stock['Date'].iloc[-sequence_length:])
forecast_dates = pd.date_range(start=dates.iloc[-1], periods=len(prediction[0]))  # Adjusted length

plt.figure(figsize=(12, 6))
plt.plot(dates, actual_prices, label='Actual Prices')
plt.plot(forecast_dates, prediction[0], label='Predicted Prices', linestyle='--')
plt.title('Stock Price Prediction')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.show()

ValueError: X has 70 features, but MinMaxScaler is expecting 5 features as input.