In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

# Define the Tomita dataset
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([0, 1, 1, 0])

# Add a unique START symbol to the beginning of each input sequence
start_symbol = np.array([[2]])
X = np.concatenate((start_symbol, X), axis=1)

# Convert the dataset to tensors
X_tensor = torch.from_numpy(X).float()
y_tensor = torch.from_numpy(y).unsqueeze(1).float()

# Define the RNN model
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(1, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.rnn(x, h0)
        out = self.fc(out[:, -1, :])
        return out

# Set the hyperparameters
input_size = 3  # Updated input size due to START symbol
hidden_size = 16
output_size = 1
learning_rate = 0.01
num_epochs = 1000

# Initialize the RNN model
model = RNN(input_size, hidden_size, output_size)

# Define the loss function and optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Train the RNN model
for epoch in range(num_epochs):
    # Forward pass
    outputs = model(X_tensor)
    loss = criterion(outputs, y_tensor)

    # Backward and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Print the loss every 100 epochs
    if (epoch+1) % 100 == 0:
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item():.4f}')

# Test the trained model
with torch.no_grad():
    model.eval()
    test_input = torch.cat((start_symbol, X_tensor[:, 1:]), dim=1)  # Add START symbol to test input
    test_output = model(test_input)
    predicted_labels = (test_output > 0.5).float().squeeze().numpy()
    print("Predicted Labels:", predicted_labels)