In [1]:
from dataset_creation import TextDataset
from torch.utils.data import DataLoader
import numpy as np

# Instantiate the dataset
text_dataset = TextDataset(directory='data/SPGC-tokens-2018-07-18/', sequence_length=100)
print(f"Dataset created with {len(text_dataset)} sequences.")

# Create a DataLoader without a sampler
dataloader = DataLoader(text_dataset, batch_size=1)

# Iterate over a few batches and print their contents
for i, (sequences, inputs) in enumerate(dataloader):
    if i >= 2:  # Adjust this value to see more/less batches
        break

    print(f"\nBatch {i+1}")
    print(f"Inputs shape: {inputs.shape}")

    # Optionally print the actual sequences (comment out if too verbose)
    sequence = ''.join([text_dataset.idx_to_char[int(idx)] for idx in inputs[0]])
    # target = text_dataset.idx_to_char[int(targets[0])]
    print(f"Sequence: {sequence}")


Dataset created with 18422222637 sequences.

Batch 1
Inputs shape: torch.Size([1, 100])
Sequence: ked to stay indoors and piece bedquilts and knit stockings and read aloud to my mother i never willi

Batch 2
Inputs shape: torch.Size([1, 100])
Sequence: lle eyes were two dancing stars she clapped her hands in riotous glee without a word she untied the 


In [2]:
import torch

# Define chars using keys of char_to_idx
chars = list(text_dataset.char_to_idx.keys())

n_characters = len(chars)  # Number of unique characters
print(f"Number of unique characters: {n_characters}")
print(f"Characters: {chars}")

Number of unique characters: 70
Characters: ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ',', '.', ';', "'", '"', '?', '!', ' ']


In [3]:
import torch.nn as nn

class HebbianLinear(nn.Linear):
    def __init__(self, in_features, out_features, bias=True):
        super(HebbianLinear, self).__init__(in_features, out_features, bias)
        self.imprints = nn.Parameter(torch.zeros_like(self.weight))

    def forward(self, input):
        # print(input)
        output = super(HebbianLinear, self).forward(input)
        self.update_imprints(input, output)
        # print(output)
        return output

    def update_imprints(self, input, output):
        # print("input shape:", input.shape)
        # print("output shape:", output.shape)
    
        # Hebbian update rule: imprint = input * output
        # Adjusting to compute the required [5, 10] imprint matrix for each batch
        # Reshape input and output for broadcasting
        input_expanded = input.unsqueeze(1)  # Shape: [batch_size, 1, in_features]
        output_expanded = output.unsqueeze(2)  # Shape: [batch_size, out_features, 1]

        # Element-wise multiplication with broadcasting
        # Results in a [batch_size, out_features, in_features] tensor
        imprint_update = output_expanded * input_expanded

        # Sum over the batch dimension to get the final imprint update
        self.imprints.data = imprint_update.sum(dim=0)



    def apply_imprints(self, reward, learning_rate):
        # Apply the imprints to the weights
        # self.weight.data += reward * learning_rate * self.imprints
        imprint_update = self.imprints.data
        # print("norm_imprint_update:", norm_imprint_update)

        # Apply the normalized imprints
        # The reward can be positive (for LTP) or negative (for LTD)
        self.weight.data += reward * learning_rate * imprint_update


# Example instantiation of HebbianLinear
layer = HebbianLinear(in_features=10, out_features=5)

# Checking if the shapes are the same
print("Shape of weights:", layer.weight.shape)
print("Shape of imprints:", layer.imprints.shape)
print("Are the shapes identical?", layer.weight.shape == layer.imprints.shape)

# Generate random data
input_data = torch.randn(3, 10)  # Batch size of 3, input features 10

# Pass data through the HebbianLinear layer
output = layer(input_data)

print("Weights:\n ", layer.weight)
layer.apply_imprints(reward=0.5, learning_rate=0.1)
print("Weights after imprint:\n ", layer.weight)

Shape of weights: torch.Size([5, 10])
Shape of imprints: torch.Size([5, 10])
Are the shapes identical? True
Weights:
  Parameter containing:
tensor([[ 0.2057,  0.0184, -0.0532, -0.2423, -0.1152,  0.2199,  0.0706,  0.1025,
          0.2512,  0.1289],
        [-0.1553,  0.1646, -0.2277,  0.0446, -0.2705, -0.0454, -0.0052,  0.1286,
          0.0754, -0.1740],
        [-0.1229, -0.2632, -0.1724,  0.2134, -0.2646,  0.1718, -0.1688, -0.2379,
          0.0897,  0.2443],
        [ 0.1675,  0.0302, -0.0020,  0.2903,  0.3014,  0.3121,  0.0762, -0.2685,
         -0.3103, -0.1092],
        [ 0.0512, -0.2552,  0.0375,  0.0566,  0.1504,  0.1837,  0.2226, -0.0789,
         -0.2185, -0.1551]], requires_grad=True)
Weights after imprint:
  Parameter containing:
tensor([[ 0.2327,  0.0167, -0.0805, -0.2949, -0.2308,  0.2357,  0.0302,  0.1919,
          0.3288,  0.1569],
        [-0.2179,  0.1636, -0.1931,  0.0697, -0.3843, -0.0942,  0.0114,  0.3153,
          0.1237, -0.1191],
        [-0.0410, -0.2689, -

In [4]:
import torch.nn.functional as F

class SimpleRNN(torch.nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers):
        super(SimpleRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # Using HebbianLinear instead of Linear
        self.linear_layers = torch.nn.ModuleList([HebbianLinear(input_size + hidden_size, hidden_size)])
        for _ in range(1, num_layers):
            self.linear_layers.append(HebbianLinear(hidden_size, hidden_size))

        # Final layers for hidden and output, also using HebbianLinear
        self.i2h = HebbianLinear(hidden_size, hidden_size)
        self.i2o = HebbianLinear(hidden_size, output_size)
        self.softmax = torch.nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        combined = torch.cat((input, hidden), dim=1)

        # Pass through the Hebbian linear layers with ReLU
        for layer in self.linear_layers:
            combined = layer(combined)
            combined = F.relu(combined)

        # Split into hidden and output
        hidden = self.i2h(combined)
        output = self.i2o(combined)
        # print(output)
        output = self.softmax(output)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, self.hidden_size)

    def apply_imprints(self, reward, learning_rate):
        # Apply imprints for all HebbianLinear layers
        for layer in self.linear_layers:
            layer.apply_imprints(reward, learning_rate)
        self.i2h.apply_imprints(reward, learning_rate)
        self.i2o.apply_imprints(reward, learning_rate)


# Ensure the input size matches the number of features for each input
input_size = n_characters
output_size = n_characters
n_hidden = 128
rnn = SimpleRNN(input_size, n_hidden, output_size,3)

# Define the loss function (criterion) and optimizer
criterion = torch.nn.NLLLoss()
# optimizer = torch.optim.Adam(rnn.parameters(), lr=0.005)



# Apply Clipping
def clip_weights(model, max_norm):
    with torch.no_grad():
        for param in model.parameters():
            param.data.clamp_(-max_norm, max_norm)

# In your training loop, after the weight update step
clip_weights(rnn, max_norm=0.5)  # Choose an appropriate max_norm value


In [5]:
import torch

# Find letter index from all_letters, e.g. "a" = 0
def letterToIndex(letter):
    return text_dataset.char_to_idx[letter]

# Just for demonstration, turn a letter into a <1 x n_characters> Tensor
def letterToTensor(letter):
    tensor = torch.zeros(1, n_characters)
    tensor[0][letterToIndex(letter)] = 1
    return tensor

# Turn a line into a <line_length x 1 x n_characters>,
# or an array of one-hot letter vectors
def lineToTensor(line):
    tensor = torch.zeros(len(line), 1, n_characters)
    for li, letter in enumerate(line):
        tensor[li][0][letterToIndex(letter)] = 1
    return tensor

print(letterToTensor('J'))

print(lineToTensor('Jones').size())

tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])
torch.Size([5, 1, 70])


In [6]:
text_dataset[3]

('em entering into my humor she improvised a cockpit by spreading her upon the ground and i liberated ',
 tensor([ 4, 12, 69,  4, 13, 19,  4, 17,  8, 13,  6, 69,  8, 13, 19, 14, 69, 12,
         24, 69,  7, 20, 12, 14, 17, 69, 18,  7,  4, 69,  8, 12, 15, 17, 14, 21,
          8, 18,  4,  3, 69,  0, 69,  2, 14,  2, 10, 15,  8, 19, 69,  1, 24, 69,
         18, 15, 17,  4,  0,  3,  8, 13,  6, 69,  7,  4, 17, 69, 20, 15, 14, 13,
         69, 19,  7,  4, 69,  6, 17, 14, 20, 13,  3, 69,  0, 13,  3, 69,  8, 69,
         11,  8,  1,  4, 17,  0, 19,  4,  3, 69]))

In [7]:
def randomTrainingExample():
    """Generate a random training example from the dataset"""
    sequence, line_tensor = text_dataset[np.random.randint(len(text_dataset))]
    return sequence, line_tensor

randomTrainingExample()

('wn i will to rome go thou and with thy train pursue mithridates till he be slain lucullus with fortu',
 tensor([22, 13, 69,  8, 69, 22,  8, 11, 11, 69, 19, 14, 69, 17, 14, 12,  4, 69,
          6, 14, 69, 19,  7, 14, 20, 69,  0, 13,  3, 69, 22,  8, 19,  7, 69, 19,
          7, 24, 69, 19, 17,  0,  8, 13, 69, 15, 20, 17, 18, 20,  4, 69, 12,  8,
         19,  7, 17,  8,  3,  0, 19,  4, 18, 69, 19,  8, 11, 11, 69,  7,  4, 69,
          1,  4, 69, 18, 11,  0,  8, 13, 69, 11, 20,  2, 20, 11, 11, 20, 18, 69,
         22,  8, 19,  7, 69,  5, 14, 17, 19, 20]))

In [8]:
learning_rate = 0.005 # If you set this too high, it might explode. If too low, it might not learn
last_n_rewards = [0]
last_n_reward_avg = 0
n_rewards = 100
def train(line_tensor):
    hidden = rnn.initHidden()
    rnn.zero_grad()

    for i in range(line_tensor.size()[0] - 1):
        hot_input_char_tensor = torch.nn.functional.one_hot(line_tensor[i], num_classes=n_characters).type(torch.float).unsqueeze(0)
        output, hidden = rnn(hot_input_char_tensor, hidden)

    # print("output shape:", output.shape)
    # print("line_tensor shape:", line_tensor.shape)
    # print(output)
    # print(line_tensor[-1].unsqueeze(0))
    loss = criterion(output, line_tensor[-1].unsqueeze(0))
    # print(loss)

    # Convert loss to a reward signal
    reward = 1 / (1 + loss.item())  # Example conversion, assuming loss is non-negative
    # print(reward)

    # update last_n_rewards
    last_n_rewards.append(reward)
    if len(last_n_rewards) > n_rewards:
        last_n_rewards.pop(0)
    last_n_reward_avg = sum(last_n_rewards) / len(last_n_rewards)
    reward_update = reward - last_n_reward_avg
    # print(reward_update)
    clip_weights(rnn, max_norm=0.5)  # Choose an appropriate max_norm value

    # Apply Hebbian updates
    rnn.apply_imprints(reward_update, learning_rate)

    # Perform backward pass and optimizer step if using gradient descent for other parameters
    # loss.backward()
    # optimizer.step()

    return output, loss.item()


In [9]:
import time
import math

n_iters = 100000
print_every = 50
plot_every = 10



# Keep track of losses for plotting
current_loss = 0
all_losses = []

def timeSince(since):
    now = time.time()
    s = now - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

start = time.time()

for iter in range(1, n_iters + 1):
    sequence, line_tensor = randomTrainingExample()
    output, loss = train(line_tensor)
    current_loss += loss

    # Print ``iter`` number, loss, name and guess
    if iter % print_every == 0:
        # Use the output to generate a character prediction
        topv, topi = output.topk(1, dim=1)  # Change dim to 1
        predicted_char = text_dataset.idx_to_char[topi[0, 0].item()]
        target_char = sequence[-1]
        correct = '✓' if predicted_char == target_char else '✗ (%s)' % target_char
        print('%d %d%% (%s) %.4f %s / %s %s' % (iter, iter / n_iters * 100, timeSince(start), loss, sequence, predicted_char, correct))

        # also print some weights:
        # print("i2h weights:", rnn.i2h.weight)
        # print("i2o weights:", rnn.i2o.weight)

    # Add current loss avg to list of losses
    if iter % plot_every == 0:
        all_losses.append(current_loss / plot_every)
        current_loss = 0

0.09627902347366801
0.0642453315029716
0.046083009601005476
0.036550338035416546
0.02995886365938452
0.028947107304090253
0.025325776341128065
0.019604064901092427
0.016987131277232753
0.019068274528918655
0.01714989206326009
0.016210976796676407
0.011914707214763759
0.010103861840715184
0.013494057699928813
0.009044784627591618
0.010443414075461094
0.01003859940260654
0.007475823462962189
0.006646326152155185
0.0068187698928294516
0.006504288753854337
0.008502961416139432
0.00827645120457024
0.005761683308846632
0.006197941165837134
0.005424402351804886
0.006019772539734541
0.008987760879618861
0.008878555404941646
0.0052438322897759015
0.005268968060503559
0.0059979155736480105
0.004507886615770423
0.0073535852093905285
0.007149525889566993
0.0038436214961371507
0.0032562955963362783
0.003003029399195445
0.005416813867095777
0.0027906366118663484
0.0033477079831411294
0.0065659748606855806
0.0030989440121326206
0.0035457218308979077
0.0033995680068113487
0.004308767790565271
0.003322

KeyboardInterrupt: 

In [None]:
import matplotlib.pyplot as plt

plt.figure()
plt.plot(all_losses)