# Dependencies

In [29]:
import numpy as np
import matplotlib.pyplot as plt
import random

# Data Creation

In [33]:
def generate_sentence(min_len=0, max_len=20, pos=True):
    if pos:
        n = np.random.randint(np.ceil(min_len / 3), np.ceil((max_len) / 3))
        sentence = n*"a" + n*"b" + n*"c"
        return sentence, len(sentence)
    else:
        n_0 = np.random.randint(0, max_len)
        n_1 = np.random.randint(0, max_len - n_0 + 1)
        n_2 = np.random.randint(min_len - n_0 - n_1, max_len - n_0 - n_1 + 1)
        sentence = n_0 * "a" + n_1 * "b" + n_2 * "c"
        return sentence, len(sentence)

def create_data(size=10000, balance=0.1, min_len=0, max_len=20, train=True):
    data = []
    sentence_lengths = []

    for i in range(int(size*balance)):
        sentence, sentence_length = generate_sentence(min_len=min_len, max_len=max_len, pos=True)
        while sentence == "aabbcc" and train:
            sentence, sentence_length = generate_sentence(min_len=min_len, max_len=max_len, pos=True)
        if not train:
            sentence, sentence_length = "aabbcc", 6
        data.append((sentence, 1))
        sentence_lengths.append(sentence_length)
    for i in range(int((size - (size*balance)))):
        sentence, sentence_length = generate_sentence(min_len=min_len, max_len=max_len, pos=False)
        data.append((sentence, 0))
        sentence_lengths.append(sentence_length)
    
    random.shuffle(data)
    average_length = sum(sentence_lengths) / len(sentence_lengths)
    return data, average_length

train_data, avg_sent_length_train = create_data(size=1000, balance=0.5, min_len=0, max_len=20, train=True)
val_data, avg_sent_length_val = create_data(size=1000, balance=0.5, min_len=0, max_len=20, train=False)
test_data, avg_sent_length_test = create_data(size=1000, balance=0.5, min_len=21, max_len=40, train=True)

print(f"Train Data Sample:\n{train_data}")
print(f"Average Sentence Length:\n{avg_sent_length_train}")
print(f"Val Data Sample:\n{val_data}")
print(f"Average Sentence Length:\n{avg_sent_length_val}")
print(f"Test Data Sample:\n{test_data}")
print(f"Average Sentence Length:\n{avg_sent_length_test}")


Train Data Sample:
[('aaaaaabbbbbb', 0), ('', 1), ('aaaaabbbbbccccc', 1), ('aaaabbbbcccc', 1), ('aaaaaaaaaaaaabbbb', 0), ('aaaaaaaaaaaaaaaaabb', 0), ('aaaaaaaaaaaaaaaaab', 0), ('aaaaaaaaaaaaaabb', 0), ('aabccccccccc', 0), ('aaaabbbbcccc', 1), ('aaaaaabbbbbbcccccc', 1), ('aaaaaaaaaaaaaabbbb', 0), ('bbbcccccccccccc', 0), ('aaaaabbbbbccccc', 1), ('aaaaaaaaabbbbbb', 0), ('aaaaaabbbbbbcccccc', 1), ('aaaaaaaaaaabbbbbbb', 0), ('aaaaaaaaaabbbbbb', 0), ('abc', 1), ('aaaaaaabbbbbccccc', 0), ('aaaaaaaaaaabb', 0), ('aaabbbccc', 1), ('aaabbbccc', 1), ('aaaaaabbbbbbcccccc', 1), ('aaaaaabbbbbbcccccc', 1), ('aaaaaaaaaaaaaaaaaaab', 0), ('bbbbbbbbbbbb', 0), ('aaaabbbbcccc', 1), ('aaaabbbbbbbbbbbbbbb', 0), ('aaaaaaaaaaaaabbbbb', 0), ('aaaaaabbbbbbcccccc', 1), ('aaaaaabbbbbbcccccc', 1), ('aaaabbbbcccc', 1), ('aaabbbbbbbbcccccc', 0), ('aaaaaaaaaaaaaaaaabb', 0), ('abbbbbbbbbbbbbbbbbbb', 0), ('aaaabbbbcccc', 1), ('aaaaaaaaaaaabbbbbbbb', 0), ('aaabbbccc', 1), ('aaaaabbbbbccccc', 1), ('aaaaabbbbbccccc', 1), ('

# Part B: Recurrent Neural Networks

In [34]:
# Dependencies
import torch
import torch.nn as nn
import torch.optim as optim

# Encoding

In [35]:
# Encoding data
char_to_index = {'a':0, 'b':1, 'c':2}
classes = len(char_to_index)

def one_hot_encode(char, num_classes):
    one_hot = torch.zeros(classes, dtype=torch.float32)
    one_hot[char_to_index[char]] = 1.0
    return one_hot

def creat_tensors(data, max_l):
    X = []
    y = []

    for sent, label in data:
        if sent:
            encoded = [one_hot_encode(char, classes) for char in sent]
            encoded_tensor = torch.stack(encoded)
            X.append(encoded_tensor)
            y.append(label)

    padded_X = []
    for sent in X:
        padding = torch.zeros(max_l - sent.size(0), classes, dtype=torch.float32)
        padded_X.append(torch.cat([sent, padding], dim=0))

    X = torch.stack(padded_X)
    y = torch.tensor(y, dtype=torch.float32)

    return X, y

X_train, y_train = creat_tensors(train_data, max_l=40)
X_test, y_test = creat_tensors(test_data, max_l=40)

# X_train[2]

# Create torch dataset

In [36]:
from torch.utils.data import TensorDataset, DataLoader
import random

BATCH_SIZE = 64

# Assuming you have X_train and y_train from the original code
indices = list(range(len(X_train)))
random.shuffle(indices)

train_indices = indices[:900]
val_indices = indices[900:]

train_set = TensorDataset(X_train[train_indices], y_train[train_indices])
val_set = TensorDataset(X_train[val_indices], y_train[val_indices])

testset = TensorDataset(X_test, y_test)

train_loader = DataLoader(train_set, BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_set, BATCH_SIZE, shuffle=True)
test_loader = DataLoader(testset, BATCH_SIZE, shuffle=True)

# Setup model

In [37]:
# Set device
if torch.cuda.is_available():
    device = 'cuda:0'
elif torch.backends.mps.is_available():
    device = 'mps:0'
else:
    device = 'cpu'
print('GPU State:', device)

GPU State: mps:0


# Training loop

### RNN

In [40]:
class ElmanRNN(nn.Module):
    def __init__(self, INPUT_SIZE, HIDDEN_SIZE, NUM_LAYERS, OUTPUT_SIZE) -> None:
        super(ElmanRNN, self).__init__()

        self.rnn = nn.RNN(INPUT_SIZE, HIDDEN_SIZE, NUM_LAYERS, batch_first=True)
        self.classification= nn.Sequential(
            nn.Linear(HIDDEN_SIZE, OUTPUT_SIZE),
            nn.Sigmoid()
        )
    
    def forward(self, x):
        output, _ = self.rnn(x)
        logits = self.classification(output[:,-1,:])
        return logits

In [42]:
SEQUENCE_LENGTH = X_train.shape[1]
INPUT_SIZE = len(char_to_index)
HIDDEN_SIZE = 1
NUM_EPOCHS = 200
BATCH_SIZE = 64
OUTPUT_SIZE = 1
NUM_LAYERS = 1
learning_rate = 0.003

model = ElmanRNN(INPUT_SIZE, HIDDEN_SIZE, NUM_LAYERS, OUTPUT_SIZE)#.to(device)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

n_total_steps = len(train_loader)
for EPOCH in range(NUM_EPOCHS):
    model.train()
    e_loss = 0
    for batch_idx, (inputs, targets) in enumerate(train_loader):
        targets = targets.reshape(-1, 1)    # Preparing data
        # inputs, targets = inputs.to(device), targets.to(device)
        outputs = model(inputs)             # Getting output
        loss = criterion(outputs, targets)  # Calculating loss
        optimizer.zero_grad()               # Preparing backward propagation
        loss.backward()                     # Backward propagating
        optimizer.step()                    # Performing gradient descent
        e_loss += loss.item()
        
        if (batch_idx+1) % 10 == 0:
            print(f'EPOCH [{EPOCH+1}/{NUM_EPOCHS}], STEP [{batch_idx+1}/{n_total_steps}], LOSS {loss.item():.4f}, EPOCH_LOSS: {e_loss / len(train_loader):.4f}')

EPOCH [1/200], STEP [10/15], LOSS 0.6811, EPOCH_LOSS: 0.4689
EPOCH [2/200], STEP [10/15], LOSS 0.7081, EPOCH_LOSS: 0.4630
EPOCH [3/200], STEP [10/15], LOSS 0.6796, EPOCH_LOSS: 0.4635
EPOCH [4/200], STEP [10/15], LOSS 0.6837, EPOCH_LOSS: 0.4617
EPOCH [5/200], STEP [10/15], LOSS 0.6952, EPOCH_LOSS: 0.4588
EPOCH [6/200], STEP [10/15], LOSS 0.7007, EPOCH_LOSS: 0.4591
EPOCH [7/200], STEP [10/15], LOSS 0.6963, EPOCH_LOSS: 0.4601
EPOCH [8/200], STEP [10/15], LOSS 0.6862, EPOCH_LOSS: 0.4596
EPOCH [9/200], STEP [10/15], LOSS 0.6955, EPOCH_LOSS: 0.4602
EPOCH [10/200], STEP [10/15], LOSS 0.6890, EPOCH_LOSS: 0.4609
EPOCH [11/200], STEP [10/15], LOSS 0.6957, EPOCH_LOSS: 0.4613
EPOCH [12/200], STEP [10/15], LOSS 0.6858, EPOCH_LOSS: 0.4598
EPOCH [13/200], STEP [10/15], LOSS 0.6822, EPOCH_LOSS: 0.4595
EPOCH [14/200], STEP [10/15], LOSS 0.7058, EPOCH_LOSS: 0.4632
EPOCH [15/200], STEP [10/15], LOSS 0.6960, EPOCH_LOSS: 0.4627
EPOCH [16/200], STEP [10/15], LOSS 0.6888, EPOCH_LOSS: 0.4601
EPOCH [17/200], S

In [44]:
from torcheval.metrics import BinaryF1Score



def test_loop(data_loader):
    model.eval()
    metric = BinaryF1Score()
    total_correct = 0
    total_samples = 0
    test_loss = 0

    with torch.no_grad(): # Ensures that gradients are not calculated
        for inputs, targets in data_loader:
            targets = targets.reshape(-1, 1)
            # inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            test_loss += loss.item()
            predictions = (outputs > 0.5).float()
            total_correct += (predictions == targets).sum().item()
            total_samples += targets.size(0)
            metric.update(predictions.squeeze(), targets.squeeze())
    
    accuracy = total_correct / total_samples
    average_loss = test_loss / len(data_loader)
    f1 = metric.compute()

    print(f'Test Accuracy: {accuracy:.4f}')
    print(f'Average Test Loss: {average_loss:.4f}')
    print(f'F1 Score: {f1:.4f}')

test_loop(data_loader=val_loader)

Test Accuracy: 0.6071
Average Test Loss: 0.6818
F1 Score: 0.0000
