In [2]:
import numpy as np
class SimpleRNN:
    def __init__(self, vocab_size, hidden_size=5):
        # Model parameters
        self.hidden_size = hidden_size
        self.vocab_size = vocab_size
        
        # Weights
        self.W = np.random.randn(hidden_size, hidden_size) * 0.1  
        self.U = np.random.randn(hidden_size, vocab_size) * 0.1   
        self.V = np.random.randn(vocab_size, hidden_size) * 0.1   
        self.b_h = np.zeros((hidden_size, 1))                    
        self.b_o = np.zeros((vocab_size, 1))                     
        
        # Hidden state
        self.h = np.zeros((hidden_size, 1))
    
    def forward(self, inputs):
        self.h = np.zeros((self.hidden_size, 1))
        outputs = []
        
        for x in inputs:
            # encode input
            x_onehot = np.zeros((self.vocab_size, 1))
            x_onehot[x] = 1
            
            # Update hidden state
            self.h = np.tanh(np.dot(self.U, x_onehot) + np.dot(self.W, self.h) + self.b_h)
            
            # Compute output
            output = np.dot(self.V, self.h) + self.b_o
            outputs.append(output)
        
        return outputs
    
    def train(self, sequences, targets, learning_rate=0.1, epochs=100):
        for epoch in range(epochs):
            total_loss = 0
            
            for seq, target in zip(sequences, targets):
                # Forward pass
                outputs = self.forward(seq)
                
                # (predicting the 4th word)
                last_output = outputs[-1]
                
                # Softmax and cross-entropy loss
                probs = np.exp(last_output) / np.sum(np.exp(last_output))
                loss = -np.log(probs[target])
                total_loss += loss
                
                # Backward pass (simplified)
                # Gradient of output weights
                dV = np.outer(probs, self.h)
                dV[target] -= self.h.flatten()
                
                # Update weights
                self.V -= learning_rate * dV
                self.b_o -= learning_rate * (probs - (np.arange(self.vocab_size) == target).reshape(-1, 1))
            
            if epoch % 10 == 0 and len(sequences) > 0:
                print(f"Epoch {epoch}, Loss: {total_loss/len(sequences)}")

text = "i love neuroscience and i love brain science"
words = text.split()
vocab = list(set(words))
word_to_idx = {word: i for i, word in enumerate(vocab)}
idx_to_word = {i: word for i, word in enumerate(vocab)}

# Create training data 
sequences = []
targets = []
for i in range(len(words) - 3):
    seq = [word_to_idx[words[i]], word_to_idx[words[i+1]], word_to_idx[words[i+2]]]
    target = word_to_idx[words[i+3]]
    sequences.append(seq)
    targets.append(target)

print("Training sequences:")
for seq, target in zip(sequences, targets):
    print(f"{[idx_to_word[i] for i in seq]} -> {idx_to_word[target]}")

# Create and train the RNN
rnn = SimpleRNN(len(vocab))
rnn.train(sequences, targets, epochs=100, learning_rate=0.1)

# Test prediction
test_seq = [word_to_idx["i"], word_to_idx["love"], word_to_idx["neuroscience"]]
output = rnn.forward(test_seq)[-1]
predicted_idx = np.argmax(output)
print(f"\nInput: 'i love neuroscience'")
print(f"Predicted next word: '{idx_to_word[predicted_idx]}'")

Training sequences:
['i', 'love', 'neuroscience'] -> and
['love', 'neuroscience', 'and'] -> i
['neuroscience', 'and', 'i'] -> love
['and', 'i', 'love'] -> brain
['i', 'love', 'brain'] -> science
Epoch 0, Loss: [1.81762359]
Epoch 10, Loss: [1.66107739]
Epoch 20, Loss: [1.56371094]
Epoch 30, Loss: [1.48578308]
Epoch 40, Loss: [1.41818768]
Epoch 50, Loss: [1.35764752]
Epoch 60, Loss: [1.30259392]
Epoch 70, Loss: [1.25210375]
Epoch 80, Loss: [1.20554628]
Epoch 90, Loss: [1.16244462]

Input: 'i love neuroscience'
Predicted next word: 'and'
