In [1]:
# pip install numpy pandas torch scikit-learn
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import torch.nn.functional as F

In [33]:
# Fourier Transform for the linear transformer encoder
class FourierTransform(nn.Module):
    def __init__(self):
        super(FourierTransform, self).__init__()

    def forward(self, x):
        # Apply the 2D Fourier transform to the last two dimensions
        return torch.fft.fft2(x).real

# Define the Multiplexed Attention mechanism

# Define the Positionwise Feed-Forward Network
class PositionwiseFeedforward(nn.Module):
    def __init__(self, d_model, d_ff, dropout=0.1):
        super(PositionwiseFeedforward, self).__init__()
        self.linear1 = nn.Linear(d_model, d_ff)
        self.dropout = nn.Dropout(dropout)
        self.linear2 = nn.Linear(d_ff, d_model)

    def forward(self, x):
        return self.linear2(self.dropout(F.relu(self.linear1(x))))

#Encoder Layer
class TransformerEncoderLayer(nn.Module):
    def __init__(self, d_model, d_ff, dropout=0.1):
        super(TransformerEncoderLayer, self).__init__()
        self.fourier_transform = FourierTransform()
        self.feed_forward = PositionwiseFeedforward(d_model, d_ff, dropout)
        self.layernorm1 = nn.LayerNorm(d_model)
        self.layernorm2 = nn.LayerNorm(d_model)
        self.dropout = nn.Dropout(dropout)

    def forward(self, src, src_mask=None):
        # Applying Fourier Transform as a replacement for MHA
        src2 = self.fourier_transform(src)
        src = self.layernorm1(src + self.dropout(src2))

        # Positionwise Feedforward Network
        src2 = self.feed_forward(src)
        src = self.layernorm2(src + self.dropout(src2))

        return src

#Decoder Layer
class TransformerDecoderLayer(nn.Module):
    def __init__(self, d_model, num_heads, d_ff, dropout=0.1):
        super(TransformerDecoderLayer, self).__init__()
        self.self_attn = nn.MultiheadAttention(d_model, num_heads, dropout=dropout)
        self.multihead_attn = nn.MultiheadAttention(d_model, num_heads, dropout=dropout)
        self.feed_forward = PositionwiseFeedforward(d_model, d_ff, dropout)
        self.layernorm1 = nn.LayerNorm(d_model)
        self.layernorm2 = nn.LayerNorm(d_model)
        self.layernorm3 = nn.LayerNorm(d_model)
        self.dropout = nn.Dropout(dropout)

    def forward(self, tgt, memory, tgt_mask, memory_mask):
        # Self-attention on the targets
        tgt2 = self.layernorm1(tgt)
        tgt2 = self.self_attn(tgt2, tgt2, tgt2, attn_mask=tgt_mask)[0]
        tgt = tgt + self.dropout(tgt2)

        # Attention over encoder's output
        tgt2 = self.layernorm2(tgt)
        tgt2 = self.multihead_attn(tgt2, memory, memory, attn_mask=memory_mask)[0]
        tgt = tgt + self.dropout(tgt2)

        # Positionwise feedforward
        tgt2 = self.layernorm3(tgt)
        tgt = tgt + self.dropout(self.feed_forward(tgt2))
        
        return tgt
    
class LinearTransformer(nn.Module):
    def __init__(self, feature_size, num_layers, d_model, num_heads, d_ff, dropout=0.1):
        super(LinearTransformer, self).__init__()
        self.d_model = d_model
        self.num_layers = num_layers
        self.fourier_transform = FourierTransform()  # Fourier transform layer
        self.positional_encoder = nn.Embedding(1000, d_model)  # Customize based on max sequence length
        self.encoder_layers = nn.ModuleList([TransformerEncoderLayer(d_model, d_ff, dropout) for _ in range(num_layers)])
        self.decoder_layers = nn.ModuleList([TransformerDecoderLayer(d_model, num_heads, d_ff, dropout) for _ in range(num_layers)])
        self.output_layer = nn.Linear(d_model, feature_size)  # Adjust depending on your output size

    def forward(self, src, src_mask=None, tgt=None, tgt_mask=None):
        batch_size, seq_length, _ = src.size()

        # Positional Encoding
        positions = torch.arange(seq_length, device=src.device).unsqueeze(0).repeat(batch_size, 1)
        src = src + self.positional_encoder(positions)

        # Fourier Transform
        src = self.fourier_transform(src)

        # Encoder
        for layer in self.encoder_layers:
            src = layer(src, src_mask=src_mask)

        if tgt is not None:
            # If there is a target sequence (for tasks that use the decoder)
            tgt = tgt + self.positional_encoder(positions[:tgt.size(1), :])
            for layer in self.decoder_layers:
                tgt = layer(tgt, src, tgt_mask=tgt_mask)

            # Output layer for decoder output
            return self.output_layer(tgt)

        # Output layer to convert encoder output back to feature size (if only using the encoder)
        return self.output_layer(src)


In [13]:
# Load your dataset
df = pd.read_csv('/Users/oli/Documents/GitHub/Linear_Trans/stock_data/TSLA.csv') 

In [34]:
df['log_return'] = np.log(df['Close'] / df['Close'].shift(1))
df.dropna(inplace=True)  # Remove NaNs

In [35]:
df['log_return']

2     -0.037349
3     -0.002483
4     -0.002976
5      0.012281
6     -0.034033
         ...   
245    0.010482
246    0.028777
247    0.012084
248   -0.022722
249   -0.003248
Name: log_return, Length: 248, dtype: float64

In [36]:
scaler = MinMaxScaler(feature_range=(-1, 1))
df['log_return'] = scaler.fit_transform(df['log_return'].values.reshape(-1,1))

In [37]:
# Convert the DataFrame to a PyTorch Tensor
data = torch.FloatTensor(df['log_return'].values).view(-1)

In [38]:
# Define a function to create sequences
def create_inout_sequences(input_data, tw):
    inout_seq = []
    L = len(input_data)
    for i in range(L-tw):
        train_seq = input_data[i:i+tw]
        train_label = input_data[i+tw:i+tw+1]
        inout_seq.append((train_seq ,train_label))
    return inout_seq

In [39]:
# Define your sequence length
seq_length = 30  # Based on how many days you want to use to predict the next day
# Create sequences
inout_seq = create_inout_sequences(data, seq_length)

In [40]:
# Split your data into train and test sets
train_size = int(len(inout_seq) * 0.80)
train_set = inout_seq[:train_size]
test_set = inout_seq[train_size:]

# Prepare DataLoader
train_loader = DataLoader(train_set, batch_size=64, shuffle=True)
test_loader = DataLoader(test_set, batch_size=64, shuffle=False)

In [51]:
# Instantiate the model, loss function, and optimizer
model = LinearTransformer(feature_size=1, num_layers=2, d_model=64, d_ff = 2048, num_heads=8)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [52]:
for seq, labels in train_loader:
    optimizer.zero_grad()
    y_pred = model(seq.unsqueeze(-1))  # Add an extra dimension for num_features
    labels = labels.view(-1)  # Reshape labels to be 1D
    single_loss = criterion(y_pred[:, -1], labels)  # Use the last value of each sequence for prediction
    single_loss.backward()
    optimizer.step()

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


In [61]:
# Validate the model
with torch.no_grad():
    for seq, labels in test_loader:
        y_test_pred = model(seq.unsqueeze(-1))  # Add an extra dimension for num_features
        test_loss = criterion(y_test_pred[:, -1], labels)  # Use the last value of each sequence for prediction

print(f'Test loss: {test_loss.item()}')

Test loss: 0.11377670615911484


In [62]:
# Make predictions
model.eval()
predictions = []
with torch.no_grad():
    for seq, labels in test_loader:
        seq = seq.view(-1, 10, 1)  # Reshape your sequence data if necessary
        y_pred_test = model(seq)
        # Extract the last value of each sequence and reshape to 2D array
        y_pred_test_last = y_pred_test[:, -1].numpy().reshape(-1, 1)
        # Inverse transform the predictions
        y_pred_test_inv = scaler.inverse_transform(y_pred_test_last)
        predictions.extend(y_pred_test_inv)

# Convert the list of predictions to a numpy array
predictions = np.array(predictions)
print(predictions)

# To inverse transform the scaling on a single prediction value (e.g., the last prediction):
predicted_value = scaler.inverse_transform(predictions[-1].reshape(-1, 1))
print(f"Predicted Value: {predicted_value}")



[[-0.00794637]
 [-0.00792486]
 [-0.00783756]
 [-0.00786428]
 [-0.0078784 ]
 [-0.00777479]
 [-0.00790733]
 [-0.0078891 ]
 [-0.00794543]
 [-0.00787022]
 [-0.00795936]
 [-0.00807587]
 [-0.00789444]
 [-0.00791571]
 [-0.00787331]
 [-0.00797375]
 [-0.00788428]
 [-0.0077936 ]
 [-0.00787677]
 [-0.00784591]
 [-0.00783367]
 [-0.00789612]
 [-0.00789691]
 [-0.00798001]
 [-0.00789414]
 [-0.00790479]
 [-0.00813892]
 [-0.00786941]
 [-0.00796539]
 [-0.00791902]
 [-0.00792486]
 [-0.00783756]
 [-0.00790253]
 [-0.0078784 ]
 [-0.00777479]
 [-0.00793739]
 [-0.0078891 ]
 [-0.00794543]
 [-0.00791705]
 [-0.00795936]
 [-0.00807587]
 [-0.00785621]
 [-0.00791571]
 [-0.00787331]
 [-0.00794433]
 [-0.00788428]
 [-0.0077936 ]
 [-0.00809522]
 [-0.00784591]
 [-0.00783367]
 [-0.00799772]
 [-0.00789691]
 [-0.00798001]
 [-0.00786001]
 [-0.00790479]
 [-0.00813892]
 [-0.00780191]
 [-0.00796539]
 [-0.00791902]
 [-0.00793978]
 [-0.00783756]
 [-0.00790253]
 [-0.0080568 ]
 [-0.00777479]
 [-0.00793739]
 [-0.00800719]
 [-0.00794

In [63]:
# Assuming 'predicted_log_returns' is a numpy array containing your log return predictions
# and 'last_known_price' is the last known price from your test data
last_known_price = df['Close'].iloc[-1]
predicted_prices = [last_known_price]


predicted_price = predicted_prices[-1] * np.exp(predictions[0])
predicted_prices.append(predicted_price)

# The predicted price for the next time step after the last sequence in your test data
next_predicted_price = predicted_prices[-1]
print(f"Predicted price for the next time step: {next_predicted_price}")




Predicted price for the next time step: [173.83315]
