In [1]:
import numpy as np
import matplotlib.pyplot as plt
import random

In [2]:
def generate_sentence(min_len=0, max_len=20, pos=True):
    if pos:
        n = np.random.randint(np.ceil(min_len / 3), np.ceil(max_len / 3))
        sentence = n*"a" + n*"b" + n*"c"
        return sentence, len(sentence)
    else:
        n_0 = np.random.randint(0, max_len)
        n_1 = np.random.randint(0, max_len - n_0 + 1)
        n_2 = np.random.randint(min_len - n_0 - n_1, max_len - n_0 - n_1 + 1)
        sentence = n_0 * "a" + n_1 * "b" + n_2 * "c"
        return sentence, len(sentence)

def create_data(size=10000, balance=0.1, min_len=0, max_len=20):
    data = []
    sentence_lengths = []

    for i in range(int(size*balance)):
        sentence, sentence_length = generate_sentence(min_len=min_len, max_len=max_len, pos=True)
        data.append((sentence, 1))
        sentence_lengths.append(sentence_length)
    for i in range(int((size - (size*balance)))):
        sentence, sentence_length = generate_sentence(min_len=min_len, max_len=max_len, pos=False)
        data.append((sentence, 0))
        sentence_lengths.append(sentence_length)
    
    random.shuffle(data)
    average_length = sum(sentence_lengths) / len(sentence_lengths)
    return data, average_length

train_data, avg_sent_length_train = create_data()
test_data, avg_sent_length_test = create_data(size=1000, balance=0.5, min_len=21, max_len=40)

print(f"Train Data Sample:\n{train_data}")
print(f"Average Sentence Length:\n{avg_sent_length_train}")
print(f"Test Data Sample:\n{test_data}")
print(f"Average Sentence Length:\n{avg_sent_length_test}")


Train Data Sample:
[('aaaaaaaaaaaaaaaaaaab', 0), ('aaaaaaaabbbbbbbbbbbb', 0), ('aaaaaaaaaa', 0), ('aaaaaaaaaaaaaabbb', 0), ('aaabbbbbbbbbbbbb', 0), ('aaaaaaaaaaaaaabbbbbb', 0), ('aaaaabbbbbccccc', 1), ('aaaaaabbcccc', 0), ('aaaaaaaaaaaaaaaaaaa', 0), ('aaaaaaaaaabbbbbbbbbb', 0), ('abc', 1), ('aaaaaaaaabb', 0), ('aaabbbccc', 1), ('aaaaaaaaaaaaaaaaaa', 0), ('aaaaaaaaaaaaaaaaaaa', 0), ('aaaaaaaaaaaaab', 0), ('aaabbbccc', 1), ('aaaaaaaabbbbccccc', 0), ('aaaaaaaaaaaaaaa', 0), ('aaaaaaaabcccccccc', 0), ('aaaaaaabbbbbbbbbbc', 0), ('aaaaaaaaaaabbbbbbb', 0), ('aaaaaaaaaabbbbbbb', 0), ('aaaaaabbbb', 0), ('aaabbbbbbbbbbbbbbbbc', 0), ('aaaaaaabbbbbbbbb', 0), ('aabbbccccc', 0), ('aaaaaaabbbbbbbbbbbb', 0), ('aaaaaaaaaaaaaaabbb', 0), ('bbbbbbbbbbbccccc', 0), ('aaaaaabbbbbbbbbbbb', 0), ('aaaaaaabcccc', 0), ('aaaaaaaaaaaaaaabb', 0), ('bbbbbbbbbbbbbbb', 0), ('aaaaaaaabbbbbb', 0), ('aaaaaaaaaaaaaaabbb', 0), ('aaaaaaaaabbbbbbbbb', 0), ('aaaabbbbcccc', 1), ('aaaaabbbbbbccccccccc', 0), ('aaaaaaaaaaaaaaabb', 

In [3]:
# Dependencies
import torch
import torch.nn as nn
import torch.optim as optim

In [4]:
# Encoding data
char_to_index = {'a':0.1, 'b':0.5, 'c':1}
index_to_char = {v: k for k, v in char_to_index.items()}

def creat_tensors(data, max_l):
    X = []
    y = []
    ml = 0

    for sent, label in data:
        X.append([char_to_index[char] for char in sent])
        y.append(label)

    # Padding to be able to convert to tensor
    X = [sent + [0] * (max_l - len(sent)) for sent in X]

    X = torch.tensor(X, dtype=torch.float32)
    y = torch.tensor(y, dtype=torch.float32)

    return X, y

X_train, y_train = creat_tensors(train_data, max_l=40)
X_test, y_test = creat_tensors(test_data, max_l=40)

X_train[1]

tensor([0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.5000,
        0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000,
        0.5000, 0.5000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000])

In [5]:
from torch.utils.data import TensorDataset, DataLoader

BATCH_SIZE = 64
dataset = TensorDataset(X_train, y_train)
train_set, val_set = torch.utils.data.random_split(dataset, [9000, 1000])

testset = TensorDataset(X_test, y_test)

train_loader = DataLoader(train_set, BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_set, BATCH_SIZE, shuffle=True)
test_loader = DataLoader(testset, BATCH_SIZE, shuffle=True)

In [6]:
# Set device
if torch.cuda.is_available():
    device = 'cuda:0'
elif torch.backends.mps.is_available():
    device = 'mps:0'
else:
    device = 'cpu'
print('GPU State:', device)

GPU State: cpu


In [7]:
class LSTM(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, hidden_depth) -> None:
        super().__init__()

        self.lstm = nn.LSTM(embedding_dim, hidden_dim, hidden_depth, batch_first=True)
        self.classification= nn.Sequential(
            nn.Linear(hidden_dim, 1),
            nn.Sigmoid()
        )
    
    def forward(self, x):
        output, (hn, cn) = self.lstm(x)
        logits = self.classification(output)
        return torch.squeeze(logits)

In [8]:
input_size = X_train.shape[1]
hidden_size = 4
hidden_depth = 2
num_epochs = 100
lr=0.01

model = LSTM(input_size, hidden_size, hidden_depth)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0

    for batch_idx, (inputs, targets) in enumerate(train_loader):
        #inputs = inputs.unsqueeze(-1)  # Add input_size dimension
        # Forward pass
        outputs = model(inputs)
        #print(outputs)
        loss = criterion(outputs, targets)
        
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        
        total_norm = 0
        # for p in model.parameters():
        #     param_norm = p.grad.data.norm(2)
        #     total_norm += param_norm.item() ** 2
        # total_norm = total_norm ** (1. / 2)
        # print(f'Epoch {epoch+1}, Batch Gradient Norm: {total_norm}')
        
        optimizer.step()
        
        epoch_loss += loss.item()
    
    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {epoch_loss / len(train_loader):.4f}')

# for param in model.parameters():
#     print(param)

Epoch 1, Batch Gradient Norm: 0.5513927136695748
Epoch 1, Batch Gradient Norm: 0.5072556755191974
Epoch 1, Batch Gradient Norm: 0.44649838510476647
Epoch 1, Batch Gradient Norm: 0.4278291773600635
Epoch 1, Batch Gradient Norm: 0.49615726298939916
Epoch 1, Batch Gradient Norm: 0.44657978235435364
Epoch 1, Batch Gradient Norm: 0.41868560268454413
Epoch 1, Batch Gradient Norm: 0.4354486647668279
Epoch 1, Batch Gradient Norm: 0.36281434180016947
Epoch 1, Batch Gradient Norm: 0.3759090387870222
Epoch 1, Batch Gradient Norm: 0.3277412883750775
Epoch 1, Batch Gradient Norm: 0.46977064361645227
Epoch 1, Batch Gradient Norm: 0.3935097955773285
Epoch 1, Batch Gradient Norm: 0.3167299579269139
Epoch 1, Batch Gradient Norm: 0.4301164368906584
Epoch 1, Batch Gradient Norm: 0.338525079740752
Epoch 1, Batch Gradient Norm: 0.27828096177295547
Epoch 1, Batch Gradient Norm: 0.33912159198719505
Epoch 1, Batch Gradient Norm: 0.3318676277111095
Epoch 1, Batch Gradient Norm: 0.2686828764020433
Epoch 1, Batc

In [None]:
from torcheval.metrics import BinaryF1Score

def test_loop(dataloader, model, loss_fn):
    # Set the model to evaluation mode - important for batch normalization and dropout layers
    # Unnecessary in this situation but added for best practices
    model.eval()
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0
    
    metric = BinaryF1Score()

    # Evaluating the model with torch.no_grad() ensures that no gradients are computed during test mode
    # also serves to reduce unnecessary gradient computations and memory usage for tensors with requires_grad=True
    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (torch.round(pred) == y).type(torch.float).sum().item()
            metric.update(pred, y)
            


    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, F1-score: {metric.compute()}, Avg loss: {test_loss:>8f} \n")
    
loss_fn = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)


test_loop(test_loader, model, loss_fn)

RuntimeError: all elements of input should be between 0 and 1

In [79]:
sweep_config = {
    'method': 'random',
}

metric = {
    'name': 'loss',
    'goal': 'minimize'   
    }

sweep_config['metric'] = metric

parameters_dict = {
    'hidden_size': {
        'values': [3, 5, 10, 20]
    },
    'hidden_depth': {
        'values': [1, 2, 3]
    },
    'lr': {
        'values': [1e-2, 1e-3, 1e-4]
    },
    'num_epochs': {
        'values': [100, 200, 300]
    },
}

sweep_config['parameters'] = parameters_dict

import pprint

pprint.pprint(sweep_config)

{'method': 'random',
 'metric': {'goal': 'minimize', 'name': 'loss'},
 'parameters': {'hidden_depth': {'values': [1, 2, 3]},
                'hidden_size': {'values': [3, 5, 10, 20]},
                'lr': {'values': [0.01, 0.001, 0.0001]},
                'num_epochs': {'values': [100, 200, 300]}}}


In [11]:
import wandb
sweep_id = wandb.sweep(sweep_config, project='lstm-sweep')

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Create sweep with ID: w8wwsdbh
Sweep URL: https://wandb.ai/jarlku/lstm-sweep/sweeps/w8wwsdbh


In [12]:

def train_wandb(config=None):
    with wandb.init(config=config):
        config = wandb.config
        
        train_loader = DataLoader(train_set, BATCH_SIZE, shuffle=True)
        val_loader = DataLoader(test_set, BATCH_SIZE, shuffle=True)
        
        model = LSTM(input_size, config.hidden_size, config.hidden_depth)
        optimizer = torch.optim.Adam(model.parameters(), lr=config.lr)
        criterion = nn.BCELoss()
        

        for epoch in range(config.num_epochs):
            model.train()
            epoch_loss = 0

            for batch_idx, (inputs, targets) in enumerate(train_loader):
                #inputs = inputs.unsqueeze(-1)  # Add input_size dimension
                # Forward pass
                outputs = model(inputs)
                #print(outputs)
                loss = criterion(outputs, targets)
                
                # Backward pass and optimization
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                
                epoch_loss += loss.item()
            
            wandb.log({'loss': epoch_loss / len(train_loader)})
            if (epoch + 1) % 10 == 0:
                print(f'Epoch [{epoch + 1}/{config.num_epochs}], Loss: {epoch_loss / len(train_loader):.4f}')
        
        metric = BinaryF1Score()
        for inputs, targets in val_loader:
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            wandb.log({'val_loss': loss})
            metric.update(outputs, targets)
            wandb.log({'val_f1': metric.compute()})
            
            

wandb.agent(sweep_id, train, count=5)

In [13]:
#wandb.agent(sweep_id, train_wandb, count=5)

[34m[1mwandb[0m: Agent Starting Run: 1sxjv5zy with config:
[34m[1mwandb[0m: 	hidden_depth: 1
[34m[1mwandb[0m: 	hidden_size: 5
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_epochs: 300
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mjarlsoeren[0m ([33mjarlku[0m). Use [1m`wandb login --relogin`[0m to force relogin


Epoch [10/300], Loss: 0.0980
Epoch [20/300], Loss: 0.0712
Epoch [30/300], Loss: 0.0514
Epoch [40/300], Loss: 0.0423
Epoch [50/300], Loss: 0.0408
Epoch [60/300], Loss: 0.0384
Epoch [70/300], Loss: 0.0374
Epoch [80/300], Loss: 0.0370
Epoch [90/300], Loss: 0.0368
Epoch [100/300], Loss: 0.0364
Epoch [110/300], Loss: 0.0356
Epoch [120/300], Loss: 0.0360
Epoch [130/300], Loss: 0.0361
Epoch [140/300], Loss: 0.0361
Epoch [150/300], Loss: 0.0360
Epoch [160/300], Loss: 0.0359
Epoch [170/300], Loss: 0.0357
Epoch [180/300], Loss: 0.0353
Epoch [190/300], Loss: 0.0357
Epoch [200/300], Loss: 0.0354
Epoch [210/300], Loss: 0.0352
Epoch [220/300], Loss: 0.0354
Epoch [230/300], Loss: 0.0351
Epoch [240/300], Loss: 0.0353
Epoch [250/300], Loss: 0.0357
Epoch [260/300], Loss: 0.0354
Epoch [270/300], Loss: 0.0355
Epoch [280/300], Loss: 0.0357
Epoch [290/300], Loss: 0.0356
Epoch [300/300], Loss: 0.0353


0,1
loss,█▅▄▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_f1,█▄▂▄▃▁▂▃▁▂▂▂▃▂▁▁
val_loss,▂▄▂▂▄▅▂▃█▁▂▁▁▄▇▁

0,1
loss,0.03527
val_f1,0.93333
val_loss,0.00069


[34m[1mwandb[0m: Agent Starting Run: g8qltld7 with config:
[34m[1mwandb[0m: 	hidden_depth: 1
[34m[1mwandb[0m: 	hidden_size: 10
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_epochs: 200
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Epoch [10/200], Loss: 0.0964
Epoch [20/200], Loss: 0.0565
Epoch [30/200], Loss: 0.0455
Epoch [40/200], Loss: 0.0420
Epoch [50/200], Loss: 0.0392
Epoch [60/200], Loss: 0.0386
Epoch [70/200], Loss: 0.0376
Epoch [80/200], Loss: 0.0375
Epoch [90/200], Loss: 0.0369
Epoch [100/200], Loss: 0.0371
Epoch [110/200], Loss: 0.0374
Epoch [120/200], Loss: 0.0366
Epoch [130/200], Loss: 0.0361
Epoch [140/200], Loss: 0.0356
Epoch [150/200], Loss: 0.0361
Epoch [160/200], Loss: 0.0359
Epoch [170/200], Loss: 0.0361
Epoch [180/200], Loss: 0.0364
Epoch [190/200], Loss: 0.0358
Epoch [200/200], Loss: 0.0358


0,1
loss,█▄▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_f1,▄█▂▁▆█▆▇█▇▇▆▇▇██
val_loss,▃▁▅▂▂▁▃▃▂█▁▃▅▃▂▂

0,1
loss,0.03585
val_f1,0.93333
val_loss,0.01406


[34m[1mwandb[0m: Agent Starting Run: lhies2lt with config:
[34m[1mwandb[0m: 	hidden_depth: 3
[34m[1mwandb[0m: 	hidden_size: 20
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	num_epochs: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Epoch [10/100], Loss: 0.3371
Epoch [20/100], Loss: 0.3322
Epoch [30/100], Loss: 0.3186
Epoch [40/100], Loss: 0.2449
Epoch [50/100], Loss: 0.2100
Epoch [60/100], Loss: 0.1895
Epoch [70/100], Loss: 0.1810
Epoch [80/100], Loss: 0.1725
Epoch [90/100], Loss: 0.1650
Epoch [100/100], Loss: 0.1599


0,1
loss,█▄▄▄▄▄▄▄▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_f1,█▅▄▃▂▁▁▂▂▂▂▂▂▂▂▂
val_loss,▂▄▂▅▁▄▅▇▂▄█▆▁▂▅▂

0,1
loss,0.1599
val_f1,0.76344
val_loss,0.09416


[34m[1mwandb[0m: Agent Starting Run: ybpoev1m with config:
[34m[1mwandb[0m: 	hidden_depth: 1
[34m[1mwandb[0m: 	hidden_size: 20
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_epochs: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Epoch [10/100], Loss: 0.0683
Epoch [20/100], Loss: 0.0476
Epoch [30/100], Loss: 0.0407
Epoch [40/100], Loss: 0.0394
Epoch [50/100], Loss: 0.0380
Epoch [60/100], Loss: 0.0391
Epoch [70/100], Loss: 0.0375
Epoch [80/100], Loss: 0.0371
Epoch [90/100], Loss: 0.0365
Epoch [100/100], Loss: 0.0360


0,1
loss,█▄▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_f1,█▃▂▁▃▂▂▁▂▃▃▄▄▄▄▄
val_loss,▂▄▄▄▁▅▃█▁▃▁▁▃▂▂▂

0,1
loss,0.03602
val_f1,0.93333
val_loss,0.01747


[34m[1mwandb[0m: Agent Starting Run: qmwhwqa8 with config:
[34m[1mwandb[0m: 	hidden_depth: 1
[34m[1mwandb[0m: 	hidden_size: 20
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	num_epochs: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Epoch [10/100], Loss: 0.3285
Epoch [20/100], Loss: 0.3082
Epoch [30/100], Loss: 0.2792
Epoch [40/100], Loss: 0.2593
Epoch [50/100], Loss: 0.2458
Epoch [60/100], Loss: 0.2327
Epoch [70/100], Loss: 0.2183
Epoch [80/100], Loss: 0.2041
Epoch [90/100], Loss: 0.1924
Epoch [100/100], Loss: 0.1802


0,1
loss,█▅▄▄▄▄▃▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁
val_f1,▆▄█▅▄▃▁▂▂▃▃▂▂▁▁▁
val_loss,▅▃▅▄▇▃▇▂▆█▃▅▅▆▂▁

0,1
loss,0.18016
val_f1,0.54545
val_loss,0.09371
