In [54]:
import numpy as np
import matplotlib.pyplot as plt
import random

In [55]:
def generate_sentence(max_len=20, pos=True):
    if pos:
        n = np.random.randint(np.ceil(max_len / 3))
        sentence = n*"a" + n*"b" + n*"c"
        return sentence, len(sentence)
    else:
        n_0 = np.random.randint(0, max_len)
        n_1 = np.random.randint(0, max_len - n_0 + 1)
        n_2 = np.random.randint(0, max_len - n_0 - n_1 + 1)
        sentence = n_0 * "a" + n_1 * "b" + n_2 * "c"
        return sentence, len(sentence)

def create_data(size=10000, balance=0.5):
    data = []
    sentence_lengths = []

    for i in range(int(size*balance)):
        sentence, sentence_length = generate_sentence(pos=True)
        data.append((sentence, 1))
        sentence_lengths.append(sentence_length)
    for i in range(int((size - (size*balance)))):
        sentence, sentence_length = generate_sentence(pos=False)
        data.append((sentence, 0))
        sentence_lengths.append(sentence_length)
    
    random.shuffle(data)
    average_length = sum(sentence_lengths) / len(sentence_lengths)
    
    return data, average_length

data, avg_sent_length = create_data()
print(f"Data Sample:\n{data}")
print(f"Average Sentence Length:\n{avg_sent_length}")


Data Sample:
[('aabbcc', 1), ('aabbcc', 1), ('aabbcc', 1), ('aaaaaaaaaaaaaaaaaabb', 0), ('aaaaaaaaaaaaaaaabbbc', 0), ('aaaaaaaaaaaaaaaaabbb', 0), ('aaabbbccc', 1), ('aaaaabbbbbbbbbbbbbc', 0), ('aaaaabbbbbbbbbbbbbbb', 0), ('aaaaaaaaaaaaaaabbbbb', 0), ('aaabbbbcccc', 0), ('aaaaabbbbbccccc', 1), ('', 1), ('aaabbbccc', 1), ('abbbbbbbbccccccccc', 0), ('aaaabbbbcccc', 1), ('aaaaabbbbbccccc', 1), ('', 1), ('aaaabbbbcccc', 1), ('aaaccccccccccccc', 0), ('aaaaaabbbbbbcccccc', 1), ('aaaaaabbbbbbcccccc', 1), ('aaaaaaaaaaaaaaabbbbb', 0), ('', 1), ('aaaaaaaaaaaaabbbbbb', 0), ('aaaaaabbbbbbbbbcccc', 0), ('aaaaabbbbbccccc', 1), ('aabbcc', 1), ('aaaaabbbbbccccc', 1), ('aaaaabbbbbccccc', 1), ('aaabbbccc', 1), ('aaaaaaaaabbbbbbb', 0), ('aaaaaaaaaaaaaaaaaabb', 0), ('aaaaaaaaaaaaabcccc', 0), ('bcccccccccccccccc', 0), ('aaaabbbbcccc', 1), ('bbbbb', 0), ('aabbbccccccc', 0), ('aabbcc', 1), ('aaaaaaaaaaabbbbbbccc', 0), ('aaaaaaaaaaaaaaaaaaab', 0), ('aaaaabbbbbccccc', 1), ('aaaaaaaaaaaaaabbbccc', 0), ('aaaaaaaa

In [56]:
# Dependencies
import torch
import torch.nn as nn
import torch.optim as optim

In [57]:
# Encoding data
char_to_index = {'a':0, 'b':0.5, 'c':1}
index_to_char = {v: k for k, v in char_to_index.items()}
max_l = 20

def creat_tensors():
    X = []
    y = []
    ml = 0

    for sent, label in data:
        X.append([char_to_index[char] for char in sent])
        y.append(label)

    # Padding to be able to convert to tensor
    X = [sent + [0] * (max_l - len(sent)) for sent in X]

    X = torch.tensor(X, dtype=torch.float32)
    y = torch.tensor(y, dtype=torch.float32)

    return X, y

X_train, y_train = creat_tensors()

X_train[1]

tensor([0.0000, 0.0000, 0.5000, 0.5000, 1.0000, 1.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000])

In [58]:
from torch.utils.data import TensorDataset, DataLoader

BATCH_SIZE = 64
dataset = TensorDataset(X_train, y_train)
train_set, val_set, test_set = torch.utils.data.random_split(dataset, [8000, 1000, 1000])

train_loader = DataLoader(train_set, BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_set, BATCH_SIZE, shuffle=True)

In [59]:
# Set device
if torch.cuda.is_available():
    device = 'cuda:0'
elif torch.backends.mps.is_available():
    device = 'mps:0'
else:
    device = 'cpu'
print('GPU State:', device)

GPU State: cpu


In [65]:
class LSTM(nn.Module):
    def __init__(self, embedding_dim, hidden_dim) -> None:
        super().__init__()

        self.lstm = nn.LSTM(embedding_dim, hidden_dim, 2, batch_first=True)
        self.classification= nn.Sequential(
            nn.Linear(hidden_dim, 1),
            nn.Sigmoid()
        )
    
    def forward(self, x):
        output, (hn, cn) = self.lstm(x)
        logits = self.classification(output)
        return torch.squeeze(logits)

In [66]:
input_size = X_train.shape[1]
hidden_size = 20
num_epochs = 100

model = LSTM(input_size, hidden_size)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0

    for batch_idx, (inputs, targets) in enumerate(train_loader):
        #inputs = inputs.unsqueeze(-1)  # Add input_size dimension
        # Forward pass
        outputs = model(inputs)
        #print(outputs)
        loss = criterion(outputs, targets)
        
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
    
    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch + 1}/100], Loss: {epoch_loss / len(train_loader):.4f}')

Epoch [10/100], Loss: 0.1131
Epoch [20/100], Loss: 0.0696
Epoch [30/100], Loss: 0.0547
Epoch [40/100], Loss: 0.0520
Epoch [50/100], Loss: 0.0499
Epoch [60/100], Loss: 0.0488
Epoch [70/100], Loss: 0.0478
Epoch [80/100], Loss: 0.0471
Epoch [90/100], Loss: 0.0474
Epoch [100/100], Loss: 0.0463


In [78]:
def test_loop(dataloader, model, loss_fn):
    # Set the model to evaluation mode - important for batch normalization and dropout layers
    # Unnecessary in this situation but added for best practices
    model.eval()
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    # Evaluating the model with torch.no_grad() ensures that no gradients are computed during test mode
    # also serves to reduce unnecessary gradient computations and memory usage for tensors with requires_grad=True
    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (torch.round(pred) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
    
loss_fn = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)


test_loop(test_loader, model, loss_fn)

Test Error: 
 Accuracy: 99.0%, Avg loss: 0.030919 

