In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

In [6]:
class rnn (nn.Module) :

    def __init__(self,hidden_size, input_size, output_size):
        super(rnn, self).__init__()

        self.hidden_size = hidden_size
        self.i2h = nn.Linear(hidden_size+input_size, hidden_size)
        self.i2o = nn.Linear(input_size + hidden_size , output_size)
        self.softmax = nn.LogSoftmax(dim = 1)

    def forward(self, input, hidden):
        combined = torch.cat((input,hidden),1)
        hidden = self.i2h(combined)
        output = self.i2o(combined)
        output = self.softmax(output)
        
        return output, hidden

    def init_hidden(self):

        return torch.zeros(1, self.hidden_size)

In [7]:
n_samples = 1000
input_size = 15
output_size = 2
hidden_size = 20 


# Generate random data
X = torch.randn(n_samples, input_size)
y = torch.randint(0, output_size, (n_samples,))

train_dataset = TensorDataset(X, y)
train_loader = DataLoader(train_dataset, batch_size=5, shuffle=True)

In [11]:
model = rnn(hidden_size, input_size, output_size)
criterion = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

# Training loop
n_epochs = 100

for epoch in range(n_epochs):
    for x_batch, y_batch in train_loader:
        hidden = model.init_hidden().repeat(x_batch.size(0), 1)  

        optimizer.zero_grad()
        output, hidden = model(x_batch, hidden)  

        loss = criterion(output, y_batch)  
        loss.backward()
        optimizer.step()

    if epoch % 10 == 0:
        print(f'Epoch: {epoch}  Loss: {loss.item()}')

Epoch: 0  Loss: 0.7219048738479614
Epoch: 10  Loss: 0.6429959535598755
Epoch: 20  Loss: 0.7288631796836853
Epoch: 30  Loss: 0.6572973132133484
Epoch: 40  Loss: 0.7310658693313599
Epoch: 50  Loss: 0.6076589822769165
Epoch: 60  Loss: 0.8018785715103149
Epoch: 70  Loss: 0.6949945092201233
Epoch: 80  Loss: 0.7895050048828125
Epoch: 90  Loss: 0.5974259376525879


# LSTM FROM SCRATCH 

In [None]:
import torch
import torch.nn as nn

class LSTMCell(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(LSTMCell, self).__init__()

        self.input_size = input_size
        self.hidden_size = hidden_size

        # Input gate
        self.W_i = nn.Linear(input_size + hidden_size, hidden_size)
        self.b_i = nn.Parameter(torch.zeros(hidden_size))

        # Forget gate
        self.W_f = nn.Linear(input_size + hidden_size, hidden_size)
        self.b_f = nn.Parameter(torch.zeros(hidden_size))

        # Cell gate
        self.W_c = nn.Linear(input_size + hidden_size, hidden_size)
        self.b_c = nn.Parameter(torch.zeros(hidden_size))

        # Output gate
        self.W_o = nn.Linear(input_size + hidden_size, hidden_size)
        self.b_o = nn.Parameter(torch.zeros(hidden_size))

    def forward(self, x, h, c):
        combined = torch.cat((x, h), dim=1)

        f = torch.sigmoid(self.W_f(combined) + self.b_f)
        i = torch.sigmoid(self.W_i(combined) + self.b_i)
        
        g = torch.tanh(self.W_c(combined) + self.b_c)
        o = torch.sigmoid(self.W_o(combined) + self.b_o)

        c_new = f * c + i * g
        h_new = o * torch.tanh(c_new)

        return h_new, c_new

    def init_hidden(self, batch_size):
        return (torch.zeros(batch_size, self.hidden_size),
                torch.zeros(batch_size, self.hidden_size))


class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.lstm_cell = LSTMCell(input_size, hidden_size)
        self.fc = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, x):
        batch_size, seq_len, _ = x.size()
        h, c = self.lstm_cell.init_hidden(batch_size)

        for t in range(seq_len):
            h, c = self.lstm_cell(x[:, t, :], h, c)

        output = self.fc(h)
        output = self.softmax(output)
        return output


# Hyperparameters
input_size = 10
hidden_size = 20
output_size = 2
n_epochs = 100
batch_size = 5

# Generate random data
n_samples = 1000
X = torch.randn(n_samples, 5, input_size)  # 5 is the sequence length
y = torch.randint(0, output_size, (n_samples,))

# Create DataLoader
train_dataset = torch.utils.data.TensorDataset(X, y)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Initialize the model, criterion, and optimizer
model = LSTM(input_size, hidden_size, output_size)
criterion = nn.NLLLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

# Training loop
for epoch in range(n_epochs):
    for x_batch, y_batch in train_loader:
        optimizer.zero_grad()
        output = model(x_batch)
        loss = criterion(output, y_batch)
        loss.backward()
        optimizer.step()
    print(f'Epoch {epoch+1}/{n_epochs}, Loss: {loss.item()}')
