In [95]:
# pip install numpy pandas torch scikit-learn
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import torch.nn.functional as F

In [77]:
# Fourier Transform for the linear transformer encoder
class FourierTransform(nn.Module):
    def __init__(self):
        super(FourierTransform, self).__init__()

    def forward(self, x):
        # Apply the 2D Fourier transform to the last two dimensions
        return torch.fft.fft2(x).real

# Define the Multiplexed Attention mechanism
class MultiplexedAttention(nn.Module):
    def __init__(self, d_model, num_heads):
        super(MultiplexedAttention, self).__init__()
        self.num_heads = num_heads
        self.d_model = d_model
        
        assert d_model % self.num_heads == 0, "d_model must be divisible by num_heads"
        
        self.depth = d_model // self.num_heads
        
        self.Wq = nn.Linear(d_model, d_model)
        self.Wk = nn.Linear(d_model, d_model)
        self.Wv = nn.Linear(d_model, d_model)
        self.dense = nn.Linear(d_model, d_model)

    def split_heads(self, x, batch_size):
        x = x.view(batch_size, -1, self.num_heads, self.depth)
        return x.permute(0, 2, 1, 3)
    
    def forward(self, q, k, v, mask):
        batch_size = q.size(0)
        
        q = self.split_heads(self.Wq(q), batch_size)
        k = self.split_heads(self.Wk(k), batch_size)
        v = self.split_heads(self.Wv(v), batch_size)
        
        # Scaled dot product attention
        matmul_qk = torch.matmul(q, k.transpose(-2, -1))
        dk = torch.tensor(self.depth).float()
        scaled_attention_logits = matmul_qk / torch.sqrt(dk)
        
        if mask is not None:
            scaled_attention_logits += (mask * -1e9)
        
        attention_weights = F.softmax(scaled_attention_logits, dim=-1)
        output = torch.matmul(attention_weights, v)
        
        output = output.permute(0, 2, 1, 3).contiguous()
        output = output.view(batch_size, -1, self.d_model)
        
        return self.dense(output)

# Define the Positionwise Feed-Forward Network
class PositionwiseFeedforward(nn.Module):
    def __init__(self, d_model, d_ff, dropout=0.1):
        super(PositionwiseFeedforward, self).__init__()
        self.linear1 = nn.Linear(d_model, d_ff)
        self.dropout = nn.Dropout(dropout)
        self.linear2 = nn.Linear(d_ff, d_model)

    def forward(self, x):
        return self.linear2(self.dropout(F.relu(self.linear1(x))))

# Define the Linear Transformer Model
class LinearTransformer(nn.Module):
    def __init__(self, feature_size, num_layers, d_model, num_heads, d_ff, dropout=0.1):
        super(LinearTransformer, self).__init__()
        self.d_model = d_model
        self.num_layers = num_layers
        self.fourier_transform = FourierTransform()
        self.positional_encoder = nn.Embedding(1000, d_model)  # Customize based on max sequence length
        self.layers = nn.ModuleList([PositionwiseFeedforward(d_model, d_ff, dropout) for _ in range(num_layers)])
        self.multiplexed_attn = MultiplexedAttention(d_model, num_heads)
        self.output_layer = nn.Linear(d_model, feature_size)  # Adjust depending on your output size

    def forward(self, src, src_mask=None):
        batch_size, seq_length, _ = src.size()

        # Positional Encoding
        positions = torch.arange(seq_length, device=src.device).unsqueeze(0).repeat(batch_size, 1)
        src = src + self.positional_encoder(positions)

        # Fourier Transform
        src = self.fourier_transform(src)

        # Positionwise Feed-Forward Networks
        for layer in self.layers:
            src = layer(src)

        # Multiplexed Attention Decoder
        output = self.multiplexed_attn(src, src, src, src_mask)  # Q=K=V for self-attention

        # Output layer to convert back to feature size
        output = self.output_layer(output)
        return output

# Model Parameters
feature_size = 1  # Assuming we are working with a single feature, like 'Close' price
num_layers = 2
d_model = 512
num_heads = 8
d_ff = 2048
dropout = 0.1

# Instantiate the model
model = LinearTransformer(feature_size, num_layers, d_model, num_heads, d_ff, dropout)

# Dummy input for testing the model
dummy_input = torch.rand(32, 10, feature_size)  # batch_size, seq_length, feature_size
output = model(dummy_input)

print(output.shape)  # Expected output shape: (32, 10, feature_size)


torch.Size([32, 10, 1])


In [78]:
# Load your dataset
df = pd.read_csv('TSLA.csv')  # Make sure to replace this with your actual file path


In [79]:
df['log_return'] = np.log(df['Close'] / df['Close'].shift(1))
df.dropna(inplace=True)  # Remove NaNs

In [80]:
df['log_return']

1     -0.011308
2     -0.037349
3     -0.002483
4     -0.002976
5      0.012281
         ...   
245    0.010482
246    0.028777
247    0.012084
248   -0.022722
249   -0.003248
Name: log_return, Length: 249, dtype: float64

In [81]:
scaler = MinMaxScaler(feature_range=(-1, 1))
df['log_return'] = scaler.fit_transform(df['log_return'].values.reshape(-1,1))

In [82]:
# Convert the DataFrame to a PyTorch Tensor
data = torch.FloatTensor(df['log_return'].values).view(-1)

In [83]:
# Define a function to create sequences
def create_inout_sequences(input_data, tw):
    inout_seq = []
    L = len(input_data)
    for i in range(L-tw):
        train_seq = input_data[i:i+tw]
        train_label = input_data[i+tw:i+tw+1]
        inout_seq.append((train_seq ,train_label))
    return inout_seq

In [84]:
# Define your sequence length
seq_length = 10  # Based on how many days you want to use to predict the next day
# Create sequences
inout_seq = create_inout_sequences(data, seq_length)

In [85]:
# Split your data into train and test sets
train_size = int(len(inout_seq) * 0.80)
train_set = inout_seq[:train_size]
test_set = inout_seq[train_size:]

# Prepare DataLoader
train_loader = DataLoader(train_set, batch_size=64, shuffle=True)
test_loader = DataLoader(test_set, batch_size=64, shuffle=False)

In [86]:
# Instantiate the model, loss function, and optimizer
model = LinearTransformer(feature_size=1, num_layers=2, d_model=64, d_ff = 2048, num_heads=8)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [87]:
for i, item in enumerate(train_loader):
    print(i, item)
    if i > 5:
        break

0 [tensor([[-3.3746e-02,  6.5838e-01,  1.0908e-01,  1.6324e-01,  1.1754e-01,
          2.8083e-01,  1.2565e-01,  6.7744e-03, -1.2281e-01,  2.4526e-01],
        [-2.4374e-01,  5.1095e-01,  3.7219e-01,  1.1934e-02,  6.3143e-02,
          1.6346e-01,  1.7941e-01,  6.2162e-01,  2.3668e-01,  9.4636e-03],
        [ 5.0649e-01,  2.6822e-01,  3.0171e-01,  4.1845e-01,  2.9655e-01,
          2.9647e-01,  2.7662e-01,  5.4439e-01,  5.0016e-01,  3.4185e-01],
        [ 5.5576e-01,  5.0649e-01,  2.6822e-01,  3.0171e-01,  4.1845e-01,
          2.9655e-01,  2.9647e-01,  2.7662e-01,  5.4439e-01,  5.0016e-01],
        [-3.5823e-03,  4.5258e-02,  2.3182e-01,  5.7255e-01,  2.3304e-01,
          9.7034e-02,  3.2597e-01, -2.0783e-01,  4.0725e-01,  7.8278e-02],
        [-5.3927e-03,  7.7431e-01,  2.1985e-01,  2.8543e-01, -1.1235e-01,
          4.7067e-01,  1.5542e-01,  8.0404e-01,  1.3721e-01,  1.8754e-01],
        [-1.3798e-01, -1.9385e-02,  1.4473e-01, -2.1656e-01,  1.2750e-01,
          1.3045e-01,  2.5678

In [88]:
for seq, labels in train_loader:
    optimizer.zero_grad()
    y_pred = model(seq.unsqueeze(-1))  # Add an extra dimension for num_features
    labels = labels.view(-1)  # Reshape labels to be 1D
    single_loss = criterion(y_pred[:, -1], labels)  # Use the last value of each sequence for prediction
    single_loss.backward()
    optimizer.step()



  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


In [89]:

# Validate the model
with torch.no_grad():
    for seq, labels in test_loader:
        y_test_pred = model(seq.unsqueeze(-1))  # Add an extra dimension for num_features
        test_loss = criterion(y_test_pred[:, -1], labels)  # Use the last value of each sequence for prediction

print(f'Test loss: {test_loss.item()}')


Test loss: 0.7621733546257019


In [90]:
# Make predictions
model.eval()
predictions = []
with torch.no_grad():
    for seq, labels in test_loader:
        seq = seq.view(-1, 10, 1)  # Reshape your sequence data if necessary
        y_pred_test = model(seq)
        # Extract the last value of each sequence and reshape to 2D array
        y_pred_test_last = y_pred_test[:, -1].numpy().reshape(-1, 1)
        # Inverse transform the predictions
        y_pred_test_inv = scaler.inverse_transform(y_pred_test_last)
        predictions.extend(y_pred_test_inv)

# Convert the list of predictions to a numpy array
predictions = np.array(predictions)
print(predictions)

# To inverse transform the scaling on a single prediction value (e.g., the last prediction):
predicted_value = scaler.inverse_transform(predictions[-1].reshape(-1, 1))
print(f"Predicted Value: {predicted_value}")



[[-0.11423748]
 [-0.09807914]
 [-0.10716513]
 [-0.1321582 ]
 [-0.08726153]
 [-0.09321956]
 [-0.12088523]
 [-0.09219545]
 [-0.11479194]
 [-0.09588676]
 [-0.09982554]
 [-0.12001492]
 [-0.10131675]
 [-0.10309248]
 [-0.1009479 ]
 [-0.12503815]
 [-0.10166435]
 [-0.09513269]
 [-0.11008015]
 [-0.10532748]
 [-0.11566451]
 [-0.09872688]
 [-0.0985194 ]
 [-0.11570175]
 [-0.10483246]
 [-0.10976406]
 [-0.09943962]
 [-0.09843545]
 [-0.12224382]
 [-0.11112357]
 [-0.10485278]
 [-0.1032197 ]
 [-0.10052475]
 [-0.10509852]
 [-0.10579433]
 [-0.09865479]
 [-0.10583702]
 [-0.10599229]
 [-0.10362068]
 [-0.09971827]
 [-0.10687425]
 [-0.10904612]
 [-0.10176252]
 [-0.10950609]
 [-0.0979264 ]
 [-0.11734486]
 [-0.10558103]
 [-0.10523278]]
Predicted Value: [[-0.02841371]]


In [91]:
print(len(predictions))


predictions[0]

48


array([-0.11423748], dtype=float32)

In [93]:
# Assuming 'predicted_log_returns' is a numpy array containing your log return predictions
# and 'last_known_price' is the last known price from your test data
last_known_price = df['Close'].iloc[-1]
predicted_prices = [last_known_price]


predicted_price = predicted_prices[-1] * np.exp(predictions[0])
predicted_prices.append(predicted_price)

# The predicted price for the next time step after the last sequence in your test data
next_predicted_price = predicted_prices[-1]
print(f"Predicted price for the next time step: {next_predicted_price}")




Predicted price for the next time step: [156.30432]
