RNN and LSTM 

resources:

https://colah.github.io/posts/2015-08-Understanding-LSTMs/

https://stanford.edu/~shervine/teaching/cs-229/cheatsheet-deep-learning#nn


In [2]:
from sklearn import model_selection
import yfinance as yf
import numpy as np
import pandas as pd
import torch
from torch import nn 
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [None]:
ticker='BAP'
df = yf.download(ticker, start='2025-01-01', end='2025-06-18')
df = df[['Close']]
df

In [8]:
scaler = MinMaxScaler()
scaled = scaler.fit_transform(df)

sequence_length = 10

scaled

def create_sequence(data, sequence_length):
    X, y = [], []
    for i in range(len(data) - sequence_length):
        X.append(data[i:i+sequence_length])
        y.append(data[i+sequence_length])
    return np.array(X), np.array(y)

In [11]:
X, y = create_sequence(scaled, sequence_length)

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=.2, shuffle=False)


In [12]:
class StockDataset(Dataset):
    
    def __init__(self, x, y):
        self.x = torch.tensor(x, dtype=torch.float)
        self.y = torch.tensor(y, dtype=torch.float)
    
    def __getitem__(self, index):
        return self.x[index], self.y[index]

    def __len__(self):
        return len(self.y)        
        
    

In [15]:
train_dataset = StockDataset(x_train, y_train)
test_dataset = StockDataset(x_test, y_test)

train_dataloader = DataLoader(train_dataset, batch_size=5, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=5, shuffle=True)

In [30]:
class LSTMModel(nn.Module):
    def __init__(self, input_size=1, hidden_size=64, num_layers=1):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)
    
    def forward(self, x):
        out, hidden = self.lstm(x)
        out = out[:, -1, :]
        return self.fc(out)
        

In [39]:
model = LSTMModel()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


epochs = 100
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for x_batch, y_batch in train_dataloader:
        out = model(x_batch)
        loss = criterion(out, y_batch)
        #print(f' x is {x_batch} , shape is {x_batch.shape}')
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    if (epoch + 1) % 10 == 0:
        print(f"loss in epoch {epoch + 1}, loss {total_loss/len(train_dataloader):.5f}")    
    

loss in epoch 10, loss 0.01331
loss in epoch 20, loss 0.00883
loss in epoch 30, loss 0.00684
loss in epoch 40, loss 0.00554
loss in epoch 50, loss 0.00610
loss in epoch 60, loss 0.00531
loss in epoch 70, loss 0.00448
loss in epoch 80, loss 0.00445
loss in epoch 90, loss 0.00379
loss in epoch 100, loss 0.00326


In [25]:
#sample para entender x[:, -1, :]

a = torch.tensor([
    [[2], [1], [2]],
    [[2], [4], [0]],
    [[2], [4], [1]]
])

a[:, -1, :]


tensor([[2],
        [0],
        [1]])