# Dependencies

In [13]:
import numpy as np
import matplotlib.pyplot as plt
import random

# Data Creation

In [14]:
def generate_sentence(min_len=0, max_len=20, pos=True):
    if pos:
        n = np.random.randint(np.ceil(min_len / 3), np.ceil(max_len / 3))
        sentence = n*"a" + n*"b" + n*"c"
        return sentence, len(sentence)
    else:
        n_0 = np.random.randint(0, max_len)
        n_1 = np.random.randint(0, max_len - n_0 + 1)
        n_2 = np.random.randint(min_len - n_0 - n_1, max_len - n_0 - n_1 + 1)
        sentence = n_0 * "a" + n_1 * "b" + n_2 * "c"
        return sentence, len(sentence)

def create_data(size=10000, balance=0.1, min_len=0, max_len=20):
    data = []
    sentence_lengths = []

    for i in range(int(size*balance)):
        sentence, sentence_length = generate_sentence(min_len=min_len, max_len=max_len, pos=True)
        data.append((sentence, 1))
        sentence_lengths.append(sentence_length)
    for i in range(int((size - (size*balance)))):
        sentence, sentence_length = generate_sentence(min_len=min_len, max_len=max_len, pos=False)
        data.append((sentence, 0))
        sentence_lengths.append(sentence_length)
    
    random.shuffle(data)
    average_length = sum(sentence_lengths) / len(sentence_lengths)
    return data, average_length

train_data, avg_sent_length_train = create_data()
test_data, avg_sent_length_test = create_data(size=1000, balance=0.5, min_len=21, max_len=40)

print(f"Train Data Sample:\n{train_data}")
print(f"Average Sentence Length:\n{avg_sent_length_train}")
print(f"Test Data Sample:\n{test_data}")
print(f"Average Sentence Length:\n{avg_sent_length_test}")


Train Data Sample:
[('aaaaaaaaaaaab', 0), ('abc', 1), ('aaaaaaabbbccccccccc', 0), ('aaaaaaaaaaaaaabb', 0), ('aabbcc', 1), ('aaaaaaaaaaaaaaaabbb', 0), ('aaaaaaaaaaaaaaabbbbb', 0), ('aaaaaaaabbccccc', 0), ('aaaaaaaaaaaaaabbbbbb', 0), ('aaaaaaaaaaaaabbbb', 0), ('aaaaaaaabbbbbbbbbbb', 0), ('aaaaaaaaaaaaab', 0), ('aabbbbbbbbbbbb', 0), ('aaaaaaaaaaaaaaaaab', 0), ('aaaaaaaaaaaabbbbb', 0), ('aaaaabbbbbccccc', 1), ('aaaabbbbbbbbbbb', 0), ('aaaaaaaaaaaaaaaaaaa', 0), ('aaaaaaaaaaabbbbbbbb', 0), ('aaaaaaaaaabb', 0), ('aaaaaaabbbbbbbbbbbbb', 0), ('aabbbbbccccccc', 0), ('aaaaaaaaaaabbbbbbbbb', 0), ('aaaaaaaaaaaabbbbb', 0), ('aaaaab', 0), ('aaaaaaaaaaaaabbbb', 0), ('aaaaaaaabbbccccc', 0), ('', 1), ('aaaaaaaaaaaaaaaaabbb', 0), ('aaaaabbbbbbbbbb', 0), ('aaaabbbbcccc', 1), ('aaaaaaaaaab', 0), ('ccccccccccccccc', 0), ('aabbbbbbcc', 0), ('aaabbbbbbbbbbbbbb', 0), ('aaaaabbbbbbbbbbb', 0), ('aabbbbbbbbbbbbbbbb', 0), ('abbbbbbbbbbbbbbbbbbb', 0), ('aaaaaaaaaaaabbbbbbbb', 0), ('aaaaaaaaaabbbb', 0), ('aabbcc', 1

# Part B: Recurrent Neural Networks

In [15]:
# Dependencies
import torch
import torch.nn as nn
import torch.optim as optim

# Encoding

In [16]:
# Encoding data
char_to_index = {'a':0, 'b':1, 'c':2}
index_to_char = {v: k for k, v in char_to_index.items()}
classes = len(char_to_index)

def one_hot_encode(char, num_classes):
    one_hot = [0.0] * num_classes
    one_hot[char_to_index[char] - 1] = 1.0
    return one_hot

def creat_tensors(data, max_l):
    X = []
    y = []

    for sent, label in data:
        encoded = [one_hot_encode(char, classes) for char in sent]
        X.append(encoded)
        y.append(label)

    padded_X = []
    for sent in X:
        padding = [[0]*classes] * (max_l - len(sent))
        padded_X.append(sent + padding)

    X = torch.tensor(padded_X, dtype=torch.long)
    y = torch.tensor(y, dtype=torch.float32)

    return X, y

X_train, y_train = creat_tensors(train_data, max_l=40)
X_test, y_test = creat_tensors(test_data, max_l=40)

X_train[1]

tensor([[0, 0, 1],
        [1, 0, 0],
        [0, 1, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]])

# Create torch dataset

In [5]:
from torch.utils.data import TensorDataset, DataLoader

BATCH_SIZE = 64
dataset = TensorDataset(X_train, y_train)
train_set, val_set = torch.utils.data.random_split(dataset, [9000, 1000])

testset = TensorDataset(X_test, y_test)

train_loader = DataLoader(train_set, BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_set, BATCH_SIZE, shuffle=True)
test_loader = DataLoader(testset, BATCH_SIZE, shuffle=True)

# Setup model

In [6]:
# Set device
if torch.cuda.is_available():
    device = 'cuda:0'
elif torch.backends.mps.is_available():
    device = 'mps:0'
else:
    device = 'cpu'
print('GPU State:', device)

GPU State: mps:0


# Training loop

### RNN

In [18]:
class ElmanRNN(nn.Module):
    def __init__(self, INPUT_SIZE, HIDDEN_SIZE, NUM_LAYERS, OUTPUT_SIZE) -> None:
        super(ElmanRNN, self).__init__()

        self.rnn = nn.RNN(INPUT_SIZE, HIDDEN_SIZE, NUM_LAYERS, batch_first=True)
        self.classification= nn.Sequential(
            nn.Linear(HIDDEN_SIZE, OUTPUT_SIZE),
            nn.Sigmoid()
        )
    
    def forward(self, x):
        output, _ = self.rnn(x)
        logits = self.classification(output)
        return torch.squeeze(logits)

In [19]:
INPUT_SIZE = X_train.shape[1]
HIDDEN_SIZE = 4
NUM_LAYERS = 2
OUTPUT_SIZE = 1
num_epochs = 50
lr=0.001

model = ElmanRNN(INPUT_SIZE, HIDDEN_SIZE, NUM_LAYERS, OUTPUT_SIZE)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

for epoch in range(num_epochs):
    model.train()
    e_loss = 0    # Initial loss

    for batch_idx, (inputs, targets) in enumerate(train_loader):
        for input in inputs:
            print(input)
            break
        
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        optimizer.zero_grad()
        loss.backward()
        total_norm = 0
        optimizer.step()
        e_loss += loss.item()

    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {e_loss / len(train_loader):.4f}')


    # for batch_idx, (inputs, targets) in enumerate(train_loader):
    #     #inputs = inputs.unsqueeze(-1)  # Add input_size dimension
    #     # Forward pass
    #     outputs = model(inputs)
    #     #print(outputs)
    #     loss = criterion(outputs, targets)
        
    #     # Backward pass and optimization
    #     optimizer.zero_grad()
    #     loss.backward()
    #     total_norm = 0
    #     optimizer.step()
        
    #     epoch_loss += loss.item()
    
    # if (epoch + 1) % 10 == 0:
    #     print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {epoch_loss / len(train_loader):.4f}')

# for param in model.parameters():
#     print(param)

tensor([0.1000, 0.1000, 0.1000, 0.5000, 0.5000, 0.5000, 1.0000, 1.0000, 1.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000])
tensor([0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.5000,
        0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 1.0000, 1.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000])
tensor([0.1000, 0.1000, 0.1000, 0.1000, 0.5000, 0.5000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000

In [20]:
from torcheval.metrics import BinaryF1Score

def test_loop(dataloader, model, loss_fn):
    # Set the model to evaluation mode - important for batch normalization and dropout layers
    # Unnecessary in this situation but added for best practices
    model.eval()
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0
    
    metric = BinaryF1Score()

    # Evaluating the model with torch.no_grad() ensures that no gradients are computed during test mode
    # also serves to reduce unnecessary gradient computations and memory usage for tensors with requires_grad=True
    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)

            print(f"Predictions: {pred[:5]}")
            print(f"Targets: {y[:5]}")

            test_loss += loss_fn(pred, y).item()
            correct += (torch.round(pred) == y).type(torch.float).sum().item()
            metric.update(pred, y)
            


    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, F1-score: {metric.compute()}, Avg loss: {test_loss:>8f} \n")
    
loss_fn = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)


test_loop(test_loader, model, loss_fn)

Predictions: tensor([0.0417, 0.0417, 0.0417, 0.0417, 0.0417])
Targets: tensor([1., 1., 1., 1., 1.])
Predictions: tensor([0.0417, 0.0417, 0.0417, 0.0418, 0.0417])
Targets: tensor([0., 0., 1., 0., 1.])
Predictions: tensor([0.0417, 0.0417, 0.0418, 0.0417, 0.0417])
Targets: tensor([1., 1., 0., 0., 1.])
Predictions: tensor([0.0418, 0.0418, 0.0417, 0.0417, 0.0417])
Targets: tensor([0., 0., 1., 0., 0.])
Predictions: tensor([0.0418, 0.0418, 0.0417, 0.0418, 0.0417])
Targets: tensor([0., 0., 0., 0., 0.])
Predictions: tensor([0.0417, 0.0417, 0.0418, 0.0417, 0.0417])
Targets: tensor([1., 1., 0., 1., 1.])
Predictions: tensor([0.0417, 0.0417, 0.0417, 0.0437, 0.0418])
Targets: tensor([1., 1., 1., 0., 0.])
Predictions: tensor([0.0417, 0.0417, 0.0417, 0.0417, 0.0417])
Targets: tensor([1., 1., 1., 0., 1.])
Predictions: tensor([0.0417, 0.0417, 0.0418, 0.0417, 0.0418])
Targets: tensor([1., 1., 0., 1., 0.])
Predictions: tensor([0.0417, 0.0417, 0.0417, 0.0417, 0.0417])
Targets: tensor([1., 1., 1., 1., 0.])


In [10]:
# class ElmanRNN(nn.Module):
#     def __init__(self, input_size, hidden_size, num_layers, output_size) -> None:
#         super(ElmanRNN, self).__init__()
#         self.hidden_size = hidden_size
#         self.num_layers = num_layers
#         self.input_size = 1
#         self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
#         self.hidden_to_output = nn.Linear(hidden_size, output_size)

#     def forward(self, x):
#         h0 = torch.zeros(1, 32, 128)
#         out, _ = self.rnn(x, h0)
#         out = self.hidden_to_output(out[:, -1, :])

In [11]:
input_size = 1
hidden_size = 128
num_layers = 1
num_classes = 2
num_epochs = range(20)

model = ElmanRNN(input_size, hidden_size, num_layers, num_classes)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

for epoch in num_epochs:
    model.train()
    epoch_loss = 0

    for batch_idx, (inputs, targets) in enumerate(data_loader):
        inputs = inputs.unsqueeze(-1)  # Add input_size dimension
        targets = targets
        
        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
    
    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch + 1}/100], Loss: {epoch_loss / len(data_loader):.4f}')

NameError: name 'data_loader' is not defined