In [1]:
pip install yfinance

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.3.1 -> 25.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

stock_symbol = "RELIANCE.NS"

from datetime import datetime, timedelta

end_date = datetime.now()
start_date = end_date - timedelta(days=50)  # 2 months (roughly 60 days)

# Convert to strings in the format yfinance expects
start_date_str = start_date.strftime('%Y-%m-%d')
end_date_str = end_date.strftime('%Y-%m-%d')

data = yf.download(stock_symbol, start=start_date_str, end=end_date_str, interval="30m")

print(data.shape)


[*********************100%***********************]  1 of 1 completed

(409, 5)





In [3]:
df = data[['Open', 'High', 'Low', 'Close', 'Volume']]
print(df.shape)
print(type(df.values))

(409, 5)
<class 'numpy.ndarray'>


In [4]:
import math
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

In [5]:
class stock_data(Dataset):
    def __init__(self, data, sequence_length = 15):
        self.data = data
        self.sequence_length = sequence_length
        self.scaler = MinMaxScaler(feature_range=(0,1))
        self.scaled_data = self.scaler.fit_transform(self.data)
    
    def __len__(self):
        return len(self.scaled_data) - self.sequence_length

    def __getitem__(self, idx):
        x = self.scaled_data[idx : idx + sequence_length]
        y = self.scaled_data[idx+sequence_length, 3]

        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)


In [6]:
sequence_length = 15
batch_size = 64

dataset = stock_data(df.values, sequence_length=sequence_length)
print(df.shape)
train_loader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True, drop_last=True)

(409, 5)


In [7]:
class CustomLSTM(nn.Module):
    def __init__(self, input_sz, hidden_sz, peephole=False, dropout_prob = 0.1):
        super().__init__()
        self.input_sz = input_sz
        self.hidden_size = hidden_sz
        self.peephole = peephole
        self.U = nn.Parameter(torch.Tensor(input_sz, hidden_sz * 4))
        self.W = nn.Parameter(torch.Tensor(hidden_sz, hidden_sz * 4))
        self.dropout_prob = dropout_prob
        self.bias = nn.Parameter(torch.Tensor(hidden_sz * 4))
        self.init_weights()
        self.dropout = nn.Dropout(dropout_prob)

    def init_weights(self):
        stdv = 1.0 / math.sqrt(self.hidden_size)
        for weight in self.parameters():
            weight.data.uniform_(-stdv, stdv)

    def forward(self, x, init_states=None):
        bs, seq_sz, _ = x.size()
        hidden_seq = []
        if init_states is None:
            h_t, c_t = (torch.zeros(bs, self.hidden_size).to(x.device), 
                        torch.zeros(bs, self.hidden_size).to(x.device))
        else:
            h_t, c_t = init_states

#h_t --> hidden state(previous output)
#c_t --> cell state(aggregated data)

        HS = self.hidden_size
        for t in range(seq_sz):
            x_t = x[:, t, :]
            if self.peephole:
                gates = x_t @ self.U + c_t @ self.W + self.bias
            else:
                gates = x_t @ self.U + h_t @ self.W + self.bias
                g_t = torch.tanh(gates[:, HS*2:HS*3])

            i_t, f_t, o_t = (
                torch.sigmoid(gates[:, :HS]),
                torch.sigmoid(gates[:, HS:HS*2]),
                torch.sigmoid(gates[:, HS*3:])
            )

            if self.peephole:
                c_t = f_t * c_t + i_t * torch.sigmoid(x_t @ self.U + self.bias)[:, HS*2:HS*3]
                h_t = torch.tanh(o_t * c_t)
            else:
                c_t = f_t * c_t + i_t * g_t
                h_t = o_t * torch.tanh(c_t)

            h_t = self.dropout(h_t)

            hidden_seq.append(h_t.unsqueeze(0))

        hidden_seq = torch.cat(hidden_seq, dim=0)
        hidden_seq = hidden_seq.transpose(0, 1).contiguous()

        return hidden_seq, (h_t, c_t)

input_size = 5  # 'Open', 'High', 'Low', 'Close', 'Volume'

In [8]:
for batch_idx , (X_batch,y_batch) in enumerate(train_loader):
    print(f"Batch {batch_idx+1} - X_batch shape: {X_batch.shape}, y_batch shape: {y_batch.shape}")

Batch 1 - X_batch shape: torch.Size([64, 15, 5]), y_batch shape: torch.Size([64])
Batch 2 - X_batch shape: torch.Size([64, 15, 5]), y_batch shape: torch.Size([64])
Batch 3 - X_batch shape: torch.Size([64, 15, 5]), y_batch shape: torch.Size([64])
Batch 4 - X_batch shape: torch.Size([64, 15, 5]), y_batch shape: torch.Size([64])
Batch 5 - X_batch shape: torch.Size([64, 15, 5]), y_batch shape: torch.Size([64])
Batch 6 - X_batch shape: torch.Size([64, 15, 5]), y_batch shape: torch.Size([64])


In [9]:
import torch.optim as optim
import torch.nn as nn

input_size = 5
hidden_size = 64
model = CustomLSTM(input_size, hidden_size)

loss_function = nn.MSELoss() 
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 20  # Define how many epochs you want
for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0
    
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        
        # Forward pass
        outputs, _ = model(X_batch)  # Get the predictions
        loss = loss_function(outputs[:, -1, 3], y_batch) 

        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()

    # Print average loss for the epoch
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss / len(train_loader)}')

Epoch 1/20, Loss: 0.23145272334416708
Epoch 2/20, Loss: 0.18804974357287088
Epoch 3/20, Loss: 0.13876344139377275
Epoch 4/20, Loss: 0.07684263649086158
Epoch 5/20, Loss: 0.04850631766021252
Epoch 6/20, Loss: 0.04771097004413605
Epoch 7/20, Loss: 0.03629606279234091
Epoch 8/20, Loss: 0.03189245549341043
Epoch 9/20, Loss: 0.023778514315684635
Epoch 10/20, Loss: 0.02826595880712072
Epoch 11/20, Loss: 0.024193871145447094
Epoch 12/20, Loss: 0.026873940601944923
Epoch 13/20, Loss: 0.026567194300393265
Epoch 14/20, Loss: 0.026918066510309775
Epoch 15/20, Loss: 0.02836640893171231
Epoch 16/20, Loss: 0.025714433752000332
Epoch 17/20, Loss: 0.02536874481787284
Epoch 18/20, Loss: 0.027646489907056093
Epoch 19/20, Loss: 0.02575729616607229
Epoch 20/20, Loss: 0.03284022118896246


In [10]:
device = torch.device('cpu')

model.eval()
with torch.no_grad():
    test_input = X_batch[-1:]  # Use the last batch as a test input
    test_output = y_batch[-1]
    print('Actual Close : ',test_output)
    prediction, _ = model(test_input)
    predicted_close = prediction[:, -1, 3].numpy()  # Get the predicted 'Close' price
    print(f"Predicted Close: {predicted_close}")

Actual Close :  tensor(0.4114)
Predicted Close: [0.35437888]


In [11]:
data = yf.download(stock_symbol, period="5d", interval="30m")

scaler = MinMaxScaler(feature_range = (0,1))

# Get the last 15 data points
last_15_data = data.tail(15)

pred_input = last_15_data.loc[:,['Open', 'High', 'Low', 'Close', 'Volume']]
scaled_input = scaler.fit_transform(pred_input)



pred_tensor = torch.tensor(scaled_input , dtype=torch.float32)

with torch.no_grad():
    pred_output, _ = model(pred_tensor.unsqueeze(0))
    print("shape : ", pred_output.shape)

    normalized_pred = pred_output[:, -1, 3].numpy()
    
    # Create a dummy array with same shape as original input
    dummy_array = np.zeros((1, 5))  # 5 features: Open, High, Low, Close, Volume
    dummy_array[0, 3] = normalized_pred[0]  # Put prediction in Close price position
    
    # Inverse transform to get actual price
    actual_price = scaler.inverse_transform(dummy_array)[0, 3]

    print(f"Normalized Prediction: {normalized_pred[0]:.4f}")
    print(f"Predicted Close Price: ₹{actual_price:.2f}")
    
    # Optional: Print actual last close price for comparison
    actual_close = float(data['Close'].iloc[-1])
    print(f"Last Actual Close Price: ₹{actual_close:.2f}")
    print(f"Prediction Error: ₹{abs(actual_close - actual_price):.2f}")




[*********************100%***********************]  1 of 1 completed

shape :  torch.Size([1, 15, 64])
Normalized Prediction: 0.6983
Predicted Close Price: ₹1260.65
Last Actual Close Price: ₹1264.60
Prediction Error: ₹3.95



  actual_close = float(data['Close'].iloc[-1])


In [12]:
import joblib

# Save the model, optimizer state, and training info
torch.save({
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'epoch': num_epochs,
    'loss': epoch_loss,
    'input_size': input_size,
    'hidden_size': hidden_size
}, 'lstm_stock_model.pth')

# Save the scaler separately since it's not a PyTorch object
joblib.dump(dataset.scaler, 'lstm_stock_scaler.pkl')




['lstm_stock_scaler.pkl']

Exception in callback BaseSelectorEventLoop._read_from_self()
handle: <Handle BaseSelectorEventLoop._read_from_self()>
Traceback (most recent call last):
  File "c:\Users\GIDEON\anaconda\Lib\asyncio\events.py", line 88, in _run
    self._context.run(self._callback, *self._args)
  File "c:\Users\GIDEON\anaconda\Lib\asyncio\selector_events.py", line 132, in _read_from_self
    data = self._ssock.recv(4096)
           ^^^^^^^^^^^^^^^^^^^^^^
ConnectionResetError: [WinError 10054] An existing connection was forcibly closed by the remote host
Exception in callback BaseSelectorEventLoop._read_from_self()
handle: <Handle BaseSelectorEventLoop._read_from_self()>
Traceback (most recent call last):
  File "c:\Users\GIDEON\anaconda\Lib\asyncio\events.py", line 88, in _run
    self._context.run(self._callback, *self._args)
  File "c:\Users\GIDEON\anaconda\Lib\asyncio\selector_events.py", line 132, in _read_from_self
    data = self._ssock.recv(4096)
           ^^^^^^^^^^^^^^^^^^^^^^
ConnectionReset