In [2]:
import numpy as np
import os


In [4]:

class LSTM:
    def __init__(self, input_size, output_size, recurrences, learning_rate):
        self.x = np.zeros(input_size + output_size)
        self.input = input_size + output_size
        self.y = np.zeros(output_size)
        self.output = output_size
        self.cs = np.zeros(output_size)
        self.recurrences = recurrences
        self.learning_rate = learning_rate
        
        # Weight matrices
        self.f = np.random.randn(output_size, self.input) * 0.1
        self.i = np.random.randn(output_size, self.input) * 0.1
        self.c = np.random.randn(output_size, self.input) * 0.1
        self.o = np.random.randn(output_size, self.input) * 0.1
        
        # RMSprop memory
        self.Gf, self.Gi, self.Gc, self.Go = [np.zeros_like(m) for m in [self.f, self.i, self.c, self.o]]

    def sigmoid(self, x): return 1 / (1 + np.exp(-np.clip(x, -500, 500)))
    def tangent(self, x): return np.tanh(x)

    def forwardProp(self, x_input):
        self.x = x_input
        f = self.sigmoid(np.dot(self.f, self.x))
        self.cs *= f
        i = self.sigmoid(np.dot(self.i, self.x))
        c_cand = self.tangent(np.dot(self.c, self.x))
        self.cs += i * c_cand
        o = self.sigmoid(np.dot(self.o, self.x))
        self.y = o * self.tangent(self.cs)
        return self.cs, self.y, f, i, c_cand, o


In [5]:
# --- 2. DATA LOADING & PREPROCESSING ---
def load_and_process_data(file_path):
    if not os.path.exists(file_path):
        with open(file_path, "w") as f: f.write("hello world. this is a test for the lstm network. hello again.")
    
    data = open(file_path, 'r').read().lower()
    chars = sorted(list(set(data)))
    data_size, vocab_size = len(data), len(chars)
    char_to_ix = { ch:i for i,ch in enumerate(chars) }
    ix_to_char = { i:ch for i,ch in enumerate(chars) }
    
    return data, char_to_ix, ix_to_char, vocab_size



In [6]:
# --- 3. TRAINING & TESTING (SAMPLING) LOOP ---
def run_model():
    # Setup
    data, char_to_ix, ix_to_char, vocab_size = load_and_process_data('input.txt')
    hidden_size = 100 
    seq_length = 25 
    learning_rate = 0.1
    
    # Initialize LSTM
    # input_size to class = vocab_size (one-hot) + hidden_size
    model = LSTM(vocab_size, hidden_size, seq_length, learning_rate)
    
    # Simple Sampling/Testing Function
    def sample(h_prev, c_prev, seed_ix, n):
        x = np.zeros((vocab_size, 1))
        x[seed_ix] = 1
        ixes = []
        h, c = h_prev, c_prev
        for t in range(n):
            # Concatenate h and x for LSTM input
            combined_input = np.vstack((h.reshape(-1,1), x)).flatten()
            c, h, f, i, cc, o = model.forwardProp(combined_input)
            
            # Simple projection to vocab size (simplified for this standalone version)
            # In a full RNN, you'd have an extra Weight matrix here.
            # For this test, we just take the first 'vocab_size' bits of hidden state.
            logits = h[:vocab_size] 
            p = np.exp(logits) / np.sum(np.exp(logits))
            ix = np.random.choice(range(vocab_size), p=p.ravel())
            
            x = np.zeros((vocab_size, 1))
            x[ix] = 1
            ixes.append(ix)
        return ''.join(ix_to_char[i] for i in ixes)

    # Testing the untrained model
    h_init = np.zeros(hidden_size)
    c_init = np.zeros(hidden_size)
    print("--- Initial Test (Untrained) ---")
    print(sample(h_init, c_init, char_to_ix[data[0]], 30))
    print("-" * 30)

In [None]:
if __name__ == "__main__":
    run_model()