In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

# Define the GRU model class
class GRUScratch(nn.Module):
    def __init__(self, num_inputs, num_hiddens, num_outputs):
        super(GRUScratch, self).__init__()
        self.num_hiddens = num_hiddens
        self.gru = nn.GRU(num_inputs, num_hiddens, batch_first=True)
        self.fc = nn.Linear(num_hiddens, num_outputs)

    def forward(self, x, h0=None):
        # Forward pass through GRU layer
        out, h_n = self.gru(x, h0)
        out = self.fc(out[:, -1, :])  # Take output from last time step
        return out, h_n

# Parameters
input_size = 10   # Example input size
hidden_size = 32  # Number of GRU units
output_size = 1   # Example output size for regression

# Initialize model, loss, and optimizer
model = GRUScratch(input_size, hidden_size, output_size)
criterion = nn.MSELoss()  # Example loss function
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Example training loop (with dummy data)
# Assuming X_train is your training data and y_train is your target data
X_train = torch.randn(100, 10, input_size)  # 100 sequences, 10 timesteps, input_size
y_train = torch.randn(100, output_size)     # 100 target values

for epoch in range(100):  # Number of training epochs
    model.train()
    optimizer.zero_grad()
    outputs, _ = model(X_train)
    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()

    if epoch % 10 == 0:
        print(f"Epoch [{epoch}/100], Loss: {loss.item():.4f}")

# Testing/prediction
model.eval()
with torch.no_grad():
    test_input = torch.randn(1, 10, input_size)  # Example test sequence
    predicted_output, _ = model(test_input)
    print("Predicted output:", predicted_output)

Epoch [0/100], Loss: 1.0203
Epoch [10/100], Loss: 0.9600
Epoch [20/100], Loss: 0.9149
Epoch [30/100], Loss: 0.8710
Epoch [40/100], Loss: 0.8210
Epoch [50/100], Loss: 0.7607
Epoch [60/100], Loss: 0.6868
Epoch [70/100], Loss: 0.6009
Epoch [80/100], Loss: 0.5072
Epoch [90/100], Loss: 0.4023
Predicted output: tensor([[0.9185]])
