In [1]:
from dataset_creation import TextDataset
from torch.utils.data import DataLoader
import numpy as np

# Instantiate the dataset
text_dataset = TextDataset(directory='data/SPGC-tokens-2018-07-18/', sequence_length=100)
print(f"Dataset created with {len(text_dataset)} sequences.")

# Create a DataLoader without a sampler
dataloader = DataLoader(text_dataset, batch_size=1)

# Iterate over a few batches and print their contents
for i, (sequences, inputs) in enumerate(dataloader):
    if i >= 2:  # Adjust this value to see more/less batches
        break

    print(f"\nBatch {i+1}")
    print(f"Inputs shape: {inputs.shape}")

    # Optionally print the actual sequences (comment out if too verbose)
    sequence = ''.join([text_dataset.idx_to_char[int(idx)] for idx in inputs[0]])
    # target = text_dataset.idx_to_char[int(targets[0])]
    print(f"Sequence: {sequence}")


Dataset created with 18422222637 sequences.

Batch 1
Inputs shape: torch.Size([1, 100])
Sequence: e what you say things have gone too far to stop here did you say that spotswoode knew something abou

Batch 2
Inputs shape: torch.Size([1, 100])
Sequence:  elder negroes mam chloe whatever may have been her reserved rights of private judgment backed him u


In [2]:
import torch

# Define chars using keys of char_to_idx
chars = list(text_dataset.char_to_idx.keys())

n_characters = len(chars)  # Number of unique characters
print(f"Number of unique characters: {n_characters}")
print(f"Characters: {chars}")

Number of unique characters: 70
Characters: ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ',', '.', ';', "'", '"', '?', '!', ' ']


In [3]:
import torch.nn as nn

class HebbianLinear(nn.Linear):
    def __init__(self, in_features, out_features, bias=True):
        super(HebbianLinear, self).__init__(in_features, out_features, bias)
        self.imprints = nn.Parameter(torch.zeros_like(self.weight))

    def forward(self, input):
        # print(input)
        output = super(HebbianLinear, self).forward(input)
        self.update_imprints(input, output)
        # print(output)
        return output

    def update_imprints(self, input, output):
        # print("input shape:", input.shape)
        # print("output shape:", output.shape)
    
        # Hebbian update rule: imprint = input * output
        # Adjusting to compute the required [5, 10] imprint matrix for each batch
        # Reshape input and output for broadcasting
        input_expanded = input.unsqueeze(1)  # Shape: [batch_size, 1, in_features]
        output_expanded = output.unsqueeze(2)  # Shape: [batch_size, out_features, 1]

        # Element-wise multiplication with broadcasting
        # Results in a [batch_size, out_features, in_features] tensor
        imprint_update = output_expanded * input_expanded

        # Sum over the batch dimension to get the final imprint update
        self.imprints.data = imprint_update.sum(dim=0)



    def apply_imprints(self, reward, learning_rate, imprint_rate):
        # Apply the imprints to the weights
        # self.weight.data += reward * learning_rate * self.imprints
        imprint_update = self.imprints.data
        # print("norm_imprint_update:", norm_imprint_update)

        # Apply the normalized imprints
        # The reward can be positive (for LTP) or negative (for LTD)
        self.weight.data += reward * learning_rate * imprint_update + reward * imprint_rate * imprint_update


# Example instantiation of HebbianLinear
layer = HebbianLinear(in_features=10, out_features=5)

# Checking if the shapes are the same
print("Shape of weights:", layer.weight.shape)
print("Shape of imprints:", layer.imprints.shape)
print("Are the shapes identical?", layer.weight.shape == layer.imprints.shape)

# Generate random data
input_data = torch.randn(3, 10)  # Batch size of 3, input features 10

# Pass data through the HebbianLinear layer
output = layer(input_data)

print("Weights:\n ", layer.weight)
layer.apply_imprints(reward=0.5, learning_rate=0.1, imprint_rate=0.1)
print("Weights after imprint:\n ", layer.weight)

Shape of weights: torch.Size([5, 10])
Shape of imprints: torch.Size([5, 10])
Are the shapes identical? True
Weights:
  Parameter containing:
tensor([[-0.1377, -0.0629,  0.0757,  0.0547, -0.2133, -0.1716, -0.0403, -0.0059,
         -0.1945,  0.0075],
        [ 0.2724, -0.1911,  0.1146, -0.2831, -0.0203, -0.1604, -0.2103,  0.0553,
          0.1948,  0.2969],
        [-0.1136,  0.0831,  0.1487, -0.1349, -0.0984,  0.1712,  0.2403, -0.2754,
          0.0765, -0.1830],
        [ 0.3091, -0.0926,  0.1333, -0.2558, -0.2457, -0.2646, -0.1673, -0.0724,
          0.2906,  0.0629],
        [-0.2091,  0.2978, -0.1066,  0.1143,  0.2084,  0.0126,  0.0767,  0.1859,
         -0.1296, -0.0390]], requires_grad=True)
Weights after imprint:
  Parameter containing:
tensor([[-1.8243e-01,  5.3678e-02,  1.4110e-01,  5.7566e-02, -2.9015e-01,
         -2.2302e-01, -8.8420e-02,  1.2002e-01, -2.4858e-01, -5.0518e-02],
        [ 4.1269e-01, -4.2201e-01,  1.3695e-01, -3.6886e-01,  1.2698e-01,
         -1.9216e-01, -

In [4]:
import torch.nn.functional as F

class SimpleRNN(torch.nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers):
        super(SimpleRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # Using HebbianLinear instead of Linear
        self.linear_layers = torch.nn.ModuleList([HebbianLinear(input_size + hidden_size, hidden_size)])
        for _ in range(1, num_layers):
            self.linear_layers.append(HebbianLinear(hidden_size, hidden_size))

        # Final layers for hidden and output, also using HebbianLinear
        self.i2h = HebbianLinear(hidden_size, hidden_size)
        self.i2o = HebbianLinear(hidden_size, output_size)
        self.softmax = torch.nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        combined = torch.cat((input, hidden), dim=1)

        # Pass through the Hebbian linear layers with ReLU
        for layer in self.linear_layers:
            combined = layer(combined)
            combined = F.relu(combined)

        # Split into hidden and output
        hidden = self.i2h(combined)
        output = self.i2o(combined)
        # print(output)
        output = self.softmax(output)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, self.hidden_size)

    def apply_imprints(self, reward, learning_rate, imprint_rate):
        # Apply imprints for all HebbianLinear layers
        for layer in self.linear_layers:
            layer.apply_imprints(reward, learning_rate, imprint_rate)
        self.i2h.apply_imprints(reward, learning_rate, imprint_rate)
        self.i2o.apply_imprints(reward, learning_rate, imprint_rate)


# Ensure the input size matches the number of features for each input
input_size = n_characters
output_size = n_characters
n_hidden = 128
rnn = SimpleRNN(input_size, n_hidden, output_size,3)

# Define the loss function (criterion) and optimizer
criterion = torch.nn.NLLLoss()
# optimizer = torch.optim.Adam(rnn.parameters(), lr=0.005)



# Apply Clipping
def clip_weights(model, max_norm):
    with torch.no_grad():
        for param in model.parameters():
            param.data.clamp_(-max_norm, max_norm)

# In your training loop, after the weight update step
clip_weights(rnn, max_norm=0.5)  # Choose an appropriate max_norm value


In [5]:
import torch

# Find letter index from all_letters, e.g. "a" = 0
def letterToIndex(letter):
    return text_dataset.char_to_idx[letter]

# Just for demonstration, turn a letter into a <1 x n_characters> Tensor
def letterToTensor(letter):
    tensor = torch.zeros(1, n_characters)
    tensor[0][letterToIndex(letter)] = 1
    return tensor

# Turn a line into a <line_length x 1 x n_characters>,
# or an array of one-hot letter vectors
def lineToTensor(line):
    tensor = torch.zeros(len(line), 1, n_characters)
    for li, letter in enumerate(line):
        tensor[li][0][letterToIndex(letter)] = 1
    return tensor

print(letterToTensor('J'))

print(lineToTensor('Jones').size())

tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])
torch.Size([5, 1, 70])


In [6]:
text_dataset[3]

('es i repeated aloud in dancing along the sentence sounded important and pleased my ears presently i ',
 tensor([ 4, 18, 69,  8, 69, 17,  4, 15,  4,  0, 19,  4,  3, 69,  0, 11, 14, 20,
          3, 69,  8, 13, 69,  3,  0, 13,  2,  8, 13,  6, 69,  0, 11, 14, 13,  6,
         69, 19,  7,  4, 69, 18,  4, 13, 19,  4, 13,  2,  4, 69, 18, 14, 20, 13,
          3,  4,  3, 69,  8, 12, 15, 14, 17, 19,  0, 13, 19, 69,  0, 13,  3, 69,
         15, 11,  4,  0, 18,  4,  3, 69, 12, 24, 69,  4,  0, 17, 18, 69, 15, 17,
          4, 18,  4, 13, 19, 11, 24, 69,  8, 69]))

In [7]:
def randomTrainingExample():
    """Generate a random training example from the dataset"""
    sequence, line_tensor = text_dataset[np.random.randint(len(text_dataset))]
    return sequence, line_tensor

randomTrainingExample()

('eks but his eyes like crystal clear with truth and the woman who knew not that she was a widow but f',
 tensor([ 4, 10, 18, 69,  1, 20, 19, 69,  7,  8, 18, 69,  4, 24,  4, 18, 69, 11,
          8, 10,  4, 69,  2, 17, 24, 18, 19,  0, 11, 69,  2, 11,  4,  0, 17, 69,
         22,  8, 19,  7, 69, 19, 17, 20, 19,  7, 69,  0, 13,  3, 69, 19,  7,  4,
         69, 22, 14, 12,  0, 13, 69, 22,  7, 14, 69, 10, 13,  4, 22, 69, 13, 14,
         19, 69, 19,  7,  0, 19, 69, 18,  7,  4, 69, 22,  0, 18, 69,  0, 69, 22,
          8,  3, 14, 22, 69,  1, 20, 19, 69,  5]))

In [8]:
learning_rate = 0.005 # If you set this too high, it might explode. If too low, it might not learn
imprint_rate = 0.1
last_n_rewards = [0]
last_n_reward_avg = 0
n_rewards = 100
def train(line_tensor):
    hidden = rnn.initHidden()
    rnn.zero_grad()

    for i in range(line_tensor.size()[0] - 1):
        hot_input_char_tensor = torch.nn.functional.one_hot(line_tensor[i], num_classes=n_characters).type(torch.float).unsqueeze(0)
        output, hidden = rnn(hot_input_char_tensor, hidden)

    # print("output shape:", output.shape)
    # print("line_tensor shape:", line_tensor.shape)
    # print(output)
    # print(line_tensor[-1].unsqueeze(0))
    loss = criterion(output, line_tensor[-1].unsqueeze(0))
    # print(loss)

    # Convert loss to a reward signal
    reward = 1 / (1 + loss.item())  # Example conversion, assuming loss is non-negative
    # print(reward)

    # update last_n_rewards
    last_n_rewards.append(reward)
    if len(last_n_rewards) > n_rewards:
        last_n_rewards.pop(0)
    last_n_reward_avg = sum(last_n_rewards) / len(last_n_rewards)
    reward_update = reward - last_n_reward_avg
    # print(reward_update)
    clip_weights(rnn, max_norm=0.5)  # Choose an appropriate max_norm value

    # Apply Hebbian updates
    rnn.apply_imprints(reward_update, learning_rate, imprint_rate)

    # Perform backward pass and optimizer step if using gradient descent for other parameters
    # loss.backward()
    # optimizer.step()

    return output, loss.item()


In [9]:
import time
import math

n_iters = 100000
print_every = 50
plot_every = 10



# Keep track of losses for plotting
current_loss = 0
all_losses = []

def timeSince(since):
    now = time.time()
    s = now - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

start = time.time()

for iter in range(1, n_iters + 1):
    sequence, line_tensor = randomTrainingExample()
    output, loss = train(line_tensor)
    current_loss += loss

    # Print ``iter`` number, loss, name and guess
    if iter % print_every == 0:
        # Use the output to generate a character prediction
        topv, topi = output.topk(1, dim=1)  # Change dim to 1
        predicted_char = text_dataset.idx_to_char[topi[0, 0].item()]
        target_char = sequence[-1]
        correct = '✓' if predicted_char == target_char else '✗ (%s)' % target_char
        print('%d %d%% (%s) %.4f %s / %s %s' % (iter, iter / n_iters * 100, timeSince(start), loss, sequence, predicted_char, correct))

        # also print some weights:
        # print("i2h weights:", rnn.i2h.weight)
        # print("i2o weights:", rnn.i2o.weight)

    # Add current loss avg to list of losses
    if iter % plot_every == 0:
        all_losses.append(current_loss / plot_every)
        current_loss = 0

50 0% (0m 1s) 4.2209 �龍率眾見師兄 說情由放走二班頭 斷魂山巧遇獨行俠 人認識我 鄧飛雄訪友走天涯 黃花鋪救人打惡棍 英雄奮勇鬥群賊 惡霸安心施詭計 鄭華雄慷慨救友 惡匪棍見色起心 定巧計曹先生受贓 囑賊人惡家奴弄權 你 / f ✗ (你)
100 0% (0m 3s) 4.3242  arline pushed away the proffered money impatiently time are you goin to tell how much you need lem  / f ✗ ( )
150 0% (0m 5s) 4.3460  the broad frozen river and upon its icy surface it was so deep that meadow and river were undisting / f ✗ (g)
200 0% (0m 6s) 4.2411 eir mill running full force for more than six months is this hunter an expert also oh no parkinson s / f ✗ (s)
250 0% (0m 8s) 4.2205 nd unwatched or she will accomplish nothing one will be in waiting who will restore the and claim th / f ✗ (h)
300 0% (0m 9s) 4.3121 ent et maintenant il vous faut ma vie et vous me tuez grâce monsieur le duc norbert poussa un cri te / f ✗ (e)
350 0% (0m 11s) 4.2338 t in pitting his talent against that of betterton there was no doubt that a couple of years ago harr / f ✗ (r)
400 0% (0m 13s) 4.2226 put butter in a stewpan set it on the fire whe

KeyboardInterrupt: 

In [None]:
import matplotlib.pyplot as plt

plt.figure()
plt.plot(all_losses)