In [85]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset

In [86]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [93]:
class StockDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)  # Keep on CPU
        self.y = torch.tensor(y, dtype=torch.float32).unsqueeze(1)  # Keep on CPU

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]  # Do NOT move to GPU here


In [96]:
class StockTransformer(nn.Module):
    def __init__(self, input_dim, embed_dim, num_heads, ff_dim, num_layers, dropout=0.1):
        super(StockTransformer, self).__init__()
        self.embedding = nn.Linear(input_dim, embed_dim)  # Feature embedding
        self.encoder_layer = nn.TransformerEncoderLayer(
            d_model=embed_dim, nhead=num_heads, dim_feedforward=ff_dim, dropout=dropout, batch_first=True
        )
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers)
        self.fc = nn.Linear(embed_dim, 1)  # Output layer (predict next Close price)

    def forward(self, x):
        x = self.embedding(x)  # Linear projection
        x = self.transformer_encoder(x)
        x = x.mean(dim=1)  # Global average pooling
        x = self.fc(x)  # Regression output
        return x

In [92]:
def create_sequences(data, seq_length=30):
    """Converts time series data into sequences for Transformer."""
    sequences, targets = [], []
    
    for i in range(len(data) - seq_length):
        seq = data.iloc[i:i+seq_length]  # Get sequence window
        target = data.iloc[i+seq_length]["close"]  # Predict next close price
        
        sequences.append(seq[features].values)  # Extract numerical features
        targets.append(target)  # Store target
    
    return np.array(sequences), np.array(targets)

In [87]:
# Hyperparameters
SEQ_LEN = 30  # Number of past days used for prediction
PRED_LEN = 1  # Predict the next day's price
D_MODEL = 64  # Transformer model dimension
NHEAD = 4  # Multi-head attention heads
NUM_LAYERS = 3  # Transformer layers
BATCH_SIZE = 32
EPOCHS = 20
LR = 0.001

In [88]:
data_dir = "/kaggle/input/historical-dataset/historical_data"  # Path to your dataset folder
all_data = []

# Load and process each CSV file
for file in os.listdir(data_dir):
    if file.endswith(".csv"):
        file_path = os.path.join(data_dir, file)
        stock_name = file.replace(".csv", "")  # Extract stock ticker
        
        # Load data
        df = pd.read_csv(file_path)
        
        # Add stock name as a new column
        df["Stock"] = stock_name
        
        # Append to list
        all_data.append(df)

# Combine all stock data into a single DataFrame
final_dataset = pd.concat(all_data, ignore_index=True)
final_dataset["timestamp"] = pd.to_datetime(final_dataset["timestamp"])  # Convert to datetime format
final_dataset = final_dataset.sort_values(by=["timestamp"])  # Sort by date

In [89]:
final_dataset = final_dataset.drop(columns=["oi"])
print(final_dataset.head())

                      timestamp     open     high      low    close  volume  \
31211 2025-01-01 09:15:00+05:30   308.70   309.15   307.05   307.25  325458   
17198 2025-01-01 09:15:00+05:30  2280.00  2305.00  2280.00  2299.00  108780   
2547  2025-01-01 09:15:00+05:30   292.30   293.65   291.50   293.15  422483   
11465 2025-01-01 09:15:00+05:30   601.50   603.75   596.50   599.55  572072   
5732  2025-01-01 09:15:00+05:30  1214.85  1217.90  1213.50  1216.20  423841   

            Stock  
31211   POWERGRID  
17198  ASIANPAINT  
2547         BPCL  
11465    HINDALCO  
5732     RELIANCE  


In [90]:
label_encoder = LabelEncoder()
final_dataset["Stock"] = label_encoder.fit_transform(final_dataset["Stock"])
scaler = MinMaxScaler()
features = ["open", "high", "low", "close", "volume"]  # Feature columns
final_dataset[features] = scaler.fit_transform(final_dataset[features])

In [94]:
# Create sequences
seq_length = 30  # Number of past days to look at
X, y = create_sequences(final_dataset, seq_length)

# 🟢 Step 5: Train-Test Split
train_size = int(0.8 * len(X))
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# Print dataset shapes
print(f"X_train shape: {X_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"y_test shape: {y_test.shape}")

X_train shape: (24945, 30, 5)
X_test shape: (6237, 30, 5)
y_train shape: (24945,)
y_test shape: (6237,)


In [101]:
# 🟢 Convert NumPy Arrays to PyTorch Tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)  # Make y 2D
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)

# 🟢 Create PyTorch DataLoader
batch_size = 32
train_dataset = StockDataset(X_train_tensor, y_train_tensor)
test_dataset = StockDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(
    train_dataset,
    batch_size=256,  # Try increasing batch size
    shuffle=True,
    pin_memory=True,  # Keep CPU tensors ready for fast transfer
    num_workers=4  # Adjust based on Kaggle CPU cores (try 2, 4, or 8)
)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, pin_memory = True)

  self.X = torch.tensor(X, dtype=torch.float32)  # Keep on CPU
  self.y = torch.tensor(y, dtype=torch.float32).unsqueeze(1)  # Keep on CPU


In [97]:
# 🟢 Initialize Model, Loss, Optimizer
input_dim = X_train.shape[2]  # Number of features
embed_dim = 64
num_heads = 4
ff_dim = 128
num_layers = 3
dropout = 0.1

In [None]:
model = StockTransformer(input_dim, embed_dim, num_heads, ff_dim, num_layers, dropout).to(device)
criterion = nn.MSELoss()
optimizer = optim.AdamW(model.parameters(), lr=0.001)  # 🚀 AdamW for better GPU optimization

num_epochs = 50

model.to(device)

for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        X_batch, y_batch = X_batch.to(device, non_blocking=True), y_batch.to(device, non_blocking=True)  # Move to GPU here
        predictions = model(X_batch)  # Forward pass
        loss = criterion(predictions, y_batch)
        loss.backward()  # Backpropagation
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss / len(train_loader):.6f}")

  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 1/50, Loss: 0.055620


  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 2/50, Loss: 0.044490
Epoch 3/50, Loss: 0.044499
Epoch 4/50, Loss: 0.044595
Epoch 5/50, Loss: 0.044339
Epoch 6/50, Loss: 0.044773
Epoch 7/50, Loss: 0.045343
Epoch 8/50, Loss: 0.045010
Epoch 9/50, Loss: 0.044600
Epoch 10/50, Loss: 0.044637
Epoch 11/50, Loss: 0.044352
Epoch 12/50, Loss: 0.044433
Epoch 13/50, Loss: 0.044476
Epoch 14/50, Loss: 0.044388
Epoch 15/50, Loss: 0.044537


In [None]:
# 🟢 Evaluate the Model on GPU
model.eval()
with torch.no_grad():
    total_loss = 0
    for X_batch, y_batch in test_loader:
        predictions = model(X_batch)
        loss = criterion(predictions, y_batch)
        total_loss += loss.item()

print(f"Test Loss: {total_loss / len(test_loader):.6f}")

Prediction Pipeline

In [None]:
def prepare_input(new_data, stock_encoder, scaler):
    """
    Prepares stock market input data:
    - Encodes stock names numerically
    - Converts timestamps to numerical format
    - Normalizes all features
    - Converts to PyTorch tensor and moves to GPU
    """
    # Convert timestamp to Unix time (seconds since epoch)
    new_data["timestamp"] = pd.to_datetime(new_data["timestamp"]).astype(int) // 10**9  

    # Encode stock name using provided mapping
    new_stock_data["stock"] = new_stock_data["stock"].map(stock_encoder)

    # Select relevant columns for model input
    feature_columns = ["timestamp", "stock", "open", "high", "low", "close", "volume"]
    input_data = new_data[feature_columns].values  # Convert to numpy array

    # Normalize using same scaler as training
    input_data_scaled = scaler.transform(input_data)

    # Convert to PyTorch tensor and move to GPU
    input_tensor = torch.tensor(input_data_scaled, dtype=torch.float32).unsqueeze(0).to("cuda")  
    return input_tensor

def predict_stock_price(model, new_data_tensor):
    """
    Predicts stock price for a single input.
    """
    model.eval()
    with torch.no_grad():
        prediction = model(new_data_tensor)
    return prediction.cpu().numpy()

def inverse_transform_prediction(predicted_price, scaler, feature_index=5):  
    """
    Converts normalized predictions back to actual stock prices.
    - Assumes 'close' price is at index 5 in the scaler
    """
    temp = np.zeros((predicted_price.shape[0], scaler.n_features_in_))  
    temp[:, feature_index] = predicted_price.squeeze()  # Put predictions in the correct column

    actual_price = scaler.inverse_transform(temp)[:, feature_index]  # Extract actual price values
    return actual_price

def predict_future_prices(model, new_data, stock_encoder, scaler, days=10):
    """
    Predicts stock prices for multiple future days using a rolling window.
    """
    future_predictions = []
    input_seq = prepare_input(new_data, stock_encoder, scaler)

    for _ in range(days):
        pred = predict_stock_price(model, input_seq)
        future_predictions.append(pred)

        # Shift window: Remove first step, add new prediction
        pred_tensor = torch.tensor(pred, dtype=torch.float32).unsqueeze(0).to("cuda")
        input_seq = torch.cat([input_seq[:, 1:, :], pred_tensor], dim=1)

    # Convert predictions back to actual stock prices
    actual_predictions = inverse_transform_prediction(np.array(future_predictions), scaler)
    return actual_predictions


In [None]:
stock_encoder = {stock: idx for idx, stock in enumerate(label_encoder.classes_)}

# Example new data
new_stock_data = pd.DataFrame({
    "timestamp": ["2025-01-01 09:15:00+05:30"],
    "stock": ["RELIANCE"],  
    "open": [1214.85], "high": [1217.90], "low": [1213.50], "close": [1216.20], "volume": [423841]
})

new_data_tensor = prepare_input(new_stock_data, stock_encoder, scaler)

# Single-day prediction
predicted_price = predict_stock_price(model, new_data_tensor)
actual_price = inverse_transform_prediction(predicted_price, scaler)
print("Predicted Stock Price:", actual_price)

# Multi-day prediction
future_prices = predict_future_prices(model, new_stock_data, stock_encoder, scaler, days=10)
print("Future Stock Predictions:", future_prices)