In [3]:
import numpy as np
import matplotlib.pyplot as plt
import random

In [4]:
def generate_sentence(max_len=20, pos=True):
    if pos:
        n = np.random.randint(np.ceil(max_len / 3))
        sentence = n*"a" + n*"b" + n*"c"
        return sentence, len(sentence)
    else:
        n_0 = np.random.randint(0, max_len)
        n_1 = np.random.randint(0, max_len - n_0 + 1)
        n_2 = np.random.randint(0, max_len - n_0 - n_1 + 1)
        sentence = n_0 * "a" + n_1 * "b" + n_2 * "c"
        return sentence, len(sentence)

def create_data(size=10000, balance=0.5):
    data = []
    sentence_lengths = []

    for i in range(int(size*balance)):
        sentence, sentence_length = generate_sentence(pos=True)
        data.append((sentence, 1))
        sentence_lengths.append(sentence_length)
    for i in range(int((size - (size*balance)))):
        sentence, sentence_length = generate_sentence(pos=False)
        data.append((sentence, 0))
        sentence_lengths.append(sentence_length)
    
    random.shuffle(data)
    average_length = sum(sentence_lengths) / len(sentence_lengths)
    
    return data, average_length

data, avg_sent_length = create_data()
print(f"Data Sample:\n{data}")
print(f"Average Sentence Length:\n{avg_sent_length}")


Data Sample:
[('aaaaaaaaaaaaaaabbcc', 0), ('aaaaabbbbbccccc', 1), ('aaaaaabbbbbbcccccc', 1), ('aaaaabbbbbccccc', 1), ('aaaabbbbcccc', 1), ('abbbbbbbbbbbbbbbbb', 0), ('aaaaaaaaaaaaaaaaabb', 0), ('aaaaaabbbbbbcccccc', 1), ('aaaaabbbbbccccc', 1), ('aaaaaaaaaaaaaaaabbb', 0), ('aaaaaabbbbbbcccccc', 1), ('aaaabbbbbbbbbbbbcc', 0), ('aaaaaabbbbbbcccccc', 1), ('aaaabbbbbbbbbcccccc', 0), ('aaaaaaaaaabbbbccccc', 0), ('aaaaabbbbbbbbbbbbbbb', 0), ('aaaaabbbbbccccc', 1), ('aaaaaaaaaaaaabbbbbc', 0), ('abc', 1), ('aaabbbccc', 1), ('aaaaabbccccccccccc', 0), ('aaaaaaaabbbbccccc', 0), ('aaaaabbbbbccccc', 1), ('', 1), ('aaabcccc', 0), ('aaaabbbbcccc', 1), ('aaabbbccc', 1), ('aaaaaaaaaabbbbbbcc', 0), ('aaabbbccc', 1), ('abc', 1), ('aaaaaaaaaabbbbbccccc', 0), ('', 1), ('aaabbbccc', 1), ('aabbcc', 1), ('abc', 1), ('abc', 1), ('aaaaaaaaaaabbbbbbc', 0), ('aaaaaabbbbbbcccccc', 1), ('aaaaaabbbbbbbbbcccc', 0), ('aaaabbbbbbbbbbb', 0), ('bbbbbbbbbbbbbbbbbcc', 0), ('aaaabbbbcccc', 1), ('aaaaaaaaaaaaabccc', 0), ('aaa

In [5]:
# Dependencies
import torch
import torch.nn as nn
import torch.optim as optim

In [6]:
# Encoding data
char_to_index = {'a':ord('a'), 'b':ord('b'), 'c':ord('c')}
index_to_char = {v: k for k, v in char_to_index.items()}
max_l = 20

def creat_tensors():
    X = []
    y = []
    ml = 0

    for sent, label in data:
        X.append([char_to_index[char] for char in sent])
        y.append(label)

    # Padding to be able to convert to tensor
    X = [sent + [0] * (max_l - len(sent)) for sent in X]

    X = torch.tensor(X, dtype=torch.float32)
    y = torch.tensor(y, dtype=torch.float32)

    return X, y

X_train, y_train = creat_tensors()

X_train

tensor([[97., 97., 97.,  ..., 99., 99.,  0.],
        [97., 97., 97.,  ...,  0.,  0.,  0.],
        [97., 97., 97.,  ..., 99.,  0.,  0.],
        ...,
        [97., 97., 97.,  ..., 99., 99., 99.],
        [97., 98., 99.,  ...,  0.,  0.,  0.],
        [97., 97., 97.,  ..., 99.,  0.,  0.]])

In [7]:
from torch.utils.data import TensorDataset, DataLoader

dataset = TensorDataset(X_train, y_train)
data_loader = DataLoader(dataset, 32, shuffle=True)

In [8]:
# Set device
if torch.cuda.is_available():
    device = 'cuda:0'
elif torch.backends.mps.is_available():
    device = 'mps:0'
else:
    device = 'cpu'
print('GPU State:', device)

GPU State: cuda:0


In [31]:
class LSTM(nn.Module):
    def __init__(self, embedding_dim, hidden_dim) -> None:
        super().__init__()

        self.lstm = nn.LSTM(embedding_dim, hidden_dim, 2)
        self.classification= nn.Sequential(
            nn.Linear(hidden_dim, 1),
            nn.Sigmoid()
        )
    
    def forward(self, x):
        output, (hn, cn) = self.lstm(x)
        logits = self.classification(output)
        return torch.squeeze(logits)

In [32]:
input_size = X_train.shape[1]
hidden_size = 128
num_epochs = 100

model = LSTM(input_size, hidden_size)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0

    for batch_idx, (inputs, targets) in enumerate(data_loader):
        #inputs = inputs.unsqueeze(-1)  # Add input_size dimension
        # Forward pass
        outputs = model(inputs)
        #print(outputs)
        loss = criterion(outputs, targets)
        
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
    
    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch + 1}/100], Loss: {epoch_loss / len(data_loader):.4f}')

Epoch [10/100], Loss: 0.3163
Epoch [20/100], Loss: 0.3147
Epoch [30/100], Loss: 0.3412
Epoch [40/100], Loss: 0.3396
Epoch [50/100], Loss: 0.3282
Epoch [60/100], Loss: 0.3992
Epoch [70/100], Loss: 0.3862
Epoch [80/100], Loss: 0.4116
Epoch [90/100], Loss: 0.3928
Epoch [100/100], Loss: 0.3942
