## Loading

In [9]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [10]:
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset, TensorDataset
import helper 
from models_stationary import *
import pywt

current_path = os.getcwd()

random_state = helper.RANDOM_STATE

# Define a context manager to temporarily suppress FutureWarnings
class SuppressFutureWarnings:
    def __enter__(self):
        warnings.filterwarnings('ignore', category=FutureWarning)
    
    def __exit__(self, exc_type, exc_value, traceback):
        warnings.filterwarnings('default')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [11]:
STOCKS = ["TSLA", "AAPL", 'QQQ', "SPY", "MSFT", "AMZN", "GOOG", "DIA", "^IRX"]
START_DATE = helper.START_DATE
END_DATE = helper.END_DATE
stock_data = {}
MAs = [5, 10, 20, 50, 100, 200]
for stock in STOCKS: 
    data_path = os.path.join(current_path, "data", f"{stock}_{START_DATE}_{END_DATE}.csv")
    data = pd.read_csv(data_path)
    data['Date'] = pd.to_datetime(data['Date'])
    if stock != "^IRX":
        data['RSI'] = helper.compute_rsi(data['Close'])
        data['rsi_class'] = helper.compute_rsi_class(data)  # Assuming you have this function in helper
        #data['volume_class'] = helper.compute_volume_class(data)  # Add volume analysis
        data = helper.calculate_mas(data, MAs, column_name="Close")
        data['WVAD'] = helper.calculate_wvad(data, period=14)
        data['ROC'] = helper.calculate_roc(data, period=14)
        data['MACD'], data['macd_line'], data['signal_line'] = helper.calculate_macd(data, short_window=12, long_window=26, signal_window=9)
        data['CCI'] =  helper.calculate_cci(data, period=20)
        data['Upper Band'], data['Lower Band'], data['SMA'] = helper.calculate_bollinger_bands(data, window=20, num_std_dev=2)
        data['SMI'] = helper.calculate_smi(data, period=14, signal_period=3)
        data['ATR'] = helper.calculate_atr(data, period=14)
        data[['WVF', 'upperBand', 'rangeHigh', 'WVF_color']] = helper.cm_williams_vix_fix(data['Close'], data['Low'])
        data[['Buy_Signal', 'Sell_Signal', 'BB_Upper', 'BB_Lower']] = helper.bollinger_rsi_strategy(data['Close'])
        data = helper.on_balance_volume(data)
        data = helper.volume_price_trend(data)
        data = helper.money_flow_index(data)
        data = helper.accumulation_distribution(data)
        data = data.dropna()
    stock_data[stock] = data
    print(f"Data fetched for {stock}")

Data fetched for TSLA
Data fetched for AAPL
Data fetched for QQQ
Data fetched for SPY
Data fetched for MSFT
Data fetched for AMZN
Data fetched for GOOG
Data fetched for DIA
Data fetched for ^IRX


In [12]:
apple_data = stock_data['AAPL']
def apply_wavelet_transform(data, wavelet='haar'):
    transformed_data = pd.DataFrame(index=data.index)
    for column in data.columns:
        if pd.api.types.is_numeric_dtype(data[column]):
            cA, _ = pywt.dwt(data[column].fillna(0), wavelet)
            # Pad the transformed data with zeros to match the original length
            padding = np.zeros(len(data) - len(cA))
            cA_padded = np.concatenate([cA, padding])
            transformed_data[column] = cA_padded
        else:
            transformed_data[column] = data[column]
    return transformed_data

apple_data = apply_wavelet_transform(apple_data)

scaler = MinMaxScaler(feature_range=(0, 1))
apple_data = scaler.fit_transform(apple_data.select_dtypes(include=np.number))


def create_sequences(data, sequence_length, prediction_length):
    xs, ys = [], []
    for i in range(len(data) - sequence_length - prediction_length + 1):
        x = data[i:(i+sequence_length)]
        y = data[(i+sequence_length):(i+sequence_length+prediction_length)]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

sequence_length = 50  # Number of days in the input sequence
prediction_length = 5  # Number of days to predict
X, y = create_sequences(apple_data, sequence_length, prediction_length)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

In [13]:
class StackedAutoEncoder(nn.Module):
    def __init__(self, input_size):
        super(StackedAutoEncoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_size, 128),
            nn.ReLU(True),
            nn.Linear(128, 64),
            nn.ReLU(True),
            nn.Linear(64, 32))
        
        self.decoder = nn.Sequential(
            nn.Linear(32, 64),
            nn.ReLU(True),
            nn.Linear(64, 128),
            nn.ReLU(True),
            nn.Linear(128, input_size),
            nn.Sigmoid())

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x


In [14]:
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_layer_size, prediction_length, feature_dim):
        super(LSTM, self).__init__()
        self.feature_dim = feature_dim
        self.hidden_layer_size = hidden_layer_size
        self.lstm = nn.LSTM(input_size, hidden_layer_size, batch_first=True)
        self.linear = nn.Linear(hidden_layer_size, prediction_length * feature_dim)

    def forward(self, input_seq):
        lstm_out, _ = self.lstm(input_seq)
        lstm_out = lstm_out[:, -1, :]
        predictions = self.linear(lstm_out)
        predictions = predictions.view(-1, prediction_length, self.feature_dim)
        return predictions


In [15]:
# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

# Create DataLoaders
train_data = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_data, batch_size=64, shuffle=True)

test_data = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_data, batch_size=64, shuffle=False)


In [16]:
# Initialize models
num_epochs = 10
sae = StackedAutoEncoder(input_size=X_train.shape[2]).to(device)
lstm = LSTM(input_size=32, hidden_layer_size=50, prediction_length=5, feature_dim=y_train.shape[2]).to(device)


# Loss function and optimizer
criterion = nn.MSELoss()
optimizer_sae = torch.optim.Adam(sae.parameters(), lr=0.001)
optimizer_lstm = torch.optim.Adam(lstm.parameters(), lr=0.001)


# Training loop for SAE
for epoch in range(num_epochs):
    total_loss = 0
    for inputs, _ in train_loader:
        inputs = inputs.to(device)
        optimizer_sae.zero_grad()
        outputs = sae(inputs)
        loss = criterion(outputs, inputs)
        loss.backward()
        optimizer_sae.step()
        total_loss += loss.item()

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss / len(train_loader)}")


Epoch 1/10, Loss: 0.08243615325631165
Epoch 2/10, Loss: 0.01793174379755084
Epoch 3/10, Loss: 0.008028478656963604
Epoch 4/10, Loss: 0.007150623038774583
Epoch 5/10, Loss: 0.006497936323285103
Epoch 6/10, Loss: 0.005642410572163943
Epoch 7/10, Loss: 0.0045592204508621515
Epoch 8/10, Loss: 0.004046449909077548
Epoch 9/10, Loss: 0.003894655601825656
Epoch 10/10, Loss: 0.003784273228630787


In [17]:
# Extract features for LSTM
X_train_encoded = []
for inputs, _ in train_loader:
    inputs = inputs.to(device)
    with torch.no_grad():
        encoded = sae.encoder(inputs)
        X_train_encoded.append(encoded)

# Flatten the encoded features to maintain sequence structure
X_train_encoded_flat = torch.cat(X_train_encoded, dim=0)

# Create new DataLoader for LSTM training
train_encoded_data = TensorDataset(X_train_encoded_flat, y_train_tensor)
train_encoded_loader = DataLoader(train_encoded_data, batch_size=64, shuffle=True)

# Training loop for LSTM
for epoch in range(num_epochs):
    total_loss = 0
    for inputs, targets in train_encoded_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer_lstm.zero_grad()
        predictions = lstm(inputs)
        loss = criterion(predictions, targets)
        loss.backward()
        optimizer_lstm.step()
        total_loss += loss.item()

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss / len(train_encoded_loader)}")

Epoch 1/10, Loss: 0.0860627170016126
Epoch 2/10, Loss: 0.053425830311891506
Epoch 3/10, Loss: 0.053018490170560234
Epoch 4/10, Loss: 0.052938386252740534
Epoch 5/10, Loss: 0.05301993167618426
Epoch 6/10, Loss: 0.05293284365680159
Epoch 7/10, Loss: 0.052833615007196987
Epoch 8/10, Loss: 0.05299926503765874
Epoch 9/10, Loss: 0.05300334077782747
Epoch 10/10, Loss: 0.052771826615420785


In [18]:
def encode_and_predict(data_loader, sae, lstm):
    encoded_features = []
    predictions = []
    with torch.no_grad():
        for inputs in data_loader:
            inputs = inputs[0].to(device)  # Assuming the data_loader yields only features
            encoded = sae.encoder(inputs)
            lstm_out = lstm(encoded)
            encoded_features.append(encoded.cpu().numpy())
            predictions.append(lstm_out.cpu().numpy())
    return np.concatenate(encoded_features), np.concatenate(predictions)

# Prepare data loaders for the test set and the entire dataset
test_encoded_loader = DataLoader(test_data, batch_size=64, shuffle=False)
all_data_loader = DataLoader(TensorDataset(torch.tensor(X, dtype=torch.float32)), batch_size=64, shuffle=False)

# Encode and predict for the test set and the entire dataset
_, test_predictions = encode_and_predict(test_encoded_loader, sae, lstm)
_, all_predictions = encode_and_predict(all_data_loader, sae, lstm)

test_mse = np.mean((y_test_tensor.cpu().numpy() - test_predictions) ** 2)
print(f"Test Mean Squared Error: {test_mse}")

all_mse = np.mean((y - all_predictions) ** 2)
print(f"Entire Dataset Mean Squared Error: {all_mse}")


Test Mean Squared Error: 0.03452277183532715
Entire Dataset Mean Squared Error: 0.04800975605637996


In [28]:
def inverse_transform(predictions, scaler, sequence_length):
    num_samples, sequence_length, feature_dim = predictions.shape
    reshaped_predictions = predictions.reshape(num_samples * sequence_length, feature_dim)
    return scaler.inverse_transform(reshaped_predictions)


def inverse_wavelet_transform(predictions, sequence_length, wavelet='haar'):
    num_samples, feature_dim = predictions.shape
    reshaped_predictions = predictions.reshape(num_samples * sequence_length, feature_dim)
    
    inverted_data = pd.DataFrame(index=range(len(reshaped_predictions)))
    for i in range(feature_dim):
        inverted_data[i] = pywt.idwt(reshaped_predictions[:, i], None, wavelet)
    return inverted_data




original_test_predictions = inverse_transform(test_predictions, scaler, sequence_length)
original_all_predictions = inverse_transform(all_predictions, scaler, sequence_length)


original_test_predictions = inverse_wavelet_transform(original_test_predictions)
original_all_predictions = inverse_wavelet_transform(original_all_predictions)


TypeError: inverse_wavelet_transform() missing 1 required positional argument: 'sequence_length'