In [90]:
from RNN import load_imdb
import torch
from torch.nn.utils.rnn import pad_sequence
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

In [2]:
(x_train, y_train), (x_val, y_val), (i2w, w2i), numcls = load_imdb(final=False)

Vraag 1

In [3]:
def pad_and_convert_to_tensor(batch_sequences, batch_labels, w2i):
    # Pad sequences to the maximum length
    padded_sequences = pad_sequence([torch.tensor(seq, dtype=torch.long) for seq in batch_sequences], batch_first=True, padding_value=0)
    labels_tensor = torch.tensor(batch_labels, dtype=torch.float32)
    return padded_sequences, labels_tensor

#Define batch_size
batch_size = 32 
all_padded_batches_x = []
all_tensor_batches_y = []

#Select batches of sentences in the trainset
#Define the length of the longest sentence in that batch
#Add zero's to all sentences to reach the same length of the longest sentence
#In the end all_padded_batches_x contains 625 batches of size 32. In total 20000 sentences.
for i in range(0, 500, batch_size):
    batch_x = x_train[i:i+batch_size]
    batch_y = y_train[i:i+batch_size]
    
    padded_batch_x, tensor_batch_y = pad_and_convert_to_tensor(batch_x, batch_y, w2i)
    all_padded_batches_x.append(padded_batch_x)
    all_tensor_batches_y.append(tensor_batch_y)

#print(len(all_padded_batches_x[200][1]))

Vraag 2

In [6]:
##WITHOUT LOSS
def simple_seq2seq_model(vocab_size, emb_dim, hidden_dim, num_classes, input_batches):
    # 1) Embedding layer
    embedding = nn.Embedding(vocab_size, emb_dim)

    # List to store outputs for each batch
    outputs = []

    for input_tensor in input_batches:
        embedded = embedding(input_tensor)

        # 2) Linear layer applied to each token
        linear1 = nn.Linear(emb_dim, hidden_dim)
        linear_output = linear1(embedded)

        # 3) ReLU activation
        relu = nn.ReLU()
        relu_output = relu(linear_output)

        # 4) Global max pool along the time dimension
        global_max_pool = nn.AdaptiveMaxPool1d(1)
        pooled_output = global_max_pool(relu_output.permute(0, 2, 1))

        # 5) Linear layer
        linear2 = nn.Linear(hidden_dim, num_classes)
        linear_output_final = linear2(pooled_output.squeeze(dim=2))

        outputs.append(linear_output_final)

    # Stack outputs into a single tensor
    outputs_tensor = torch.stack(outputs)

    return outputs_tensor

# Example usage
vocab_size = len(w2i)
emb_dim = 300       
hidden_dim = 300   
num_classes = 2  # because binary classification

# Forward pass for all batches
all_outputs = simple_seq2seq_model(vocab_size, emb_dim, hidden_dim, num_classes, all_padded_batches_x)

# Print the overall output shape
print(all_outputs.shape)



torch.Size([16, 32, 2])


Vraag 3

In [67]:
class SimpleSeq2SeqModel(nn.Module):
    def __init__(self, vocab_size, emb_dim, hidden_dim, num_classes):
        super(SimpleSeq2SeqModel, self).__init__()

        # 1) Embedding layer
        self.embedding = nn.Embedding(vocab_size, emb_dim)

        # 2) Linear layer applied to each token
        self.linear1 = nn.Linear(emb_dim, hidden_dim)

        # 3) ReLU activation
        self.relu = nn.ReLU()

        # 4) Global max pool along the time dimension
        self.global_max_pool = nn.AdaptiveMaxPool1d(1)

        # 5) Linear layer
        self.linear2 = nn.Linear(hidden_dim, num_classes)

    def forward(self, x):
        # Input x: (batch, time)

        # 1) Embedding layer
        embedded = self.embedding(x)

        # 2) Linear layer applied to each token
        linear_output = self.linear1(embedded)

        # 3) ReLU activation
        relu_output = self.relu(linear_output)

        # 4) Global max pool along the time dimension
        pooled_output = self.global_max_pool(relu_output.permute(0, 2, 1))

        # 5) Linear layer
        linear_output_final = self.linear2(pooled_output.squeeze(dim=2))

        return linear_output_final

In [88]:
# Define your hyperparameters
vocab_size = len(w2i)
emb_dim = 300
hidden_dim = 300
num_classes = 1
learning_rate = 0.001
num_epochs = 10

model = SimpleSeq2SeqModel(vocab_size, emb_dim, hidden_dim, num_classes)

# Define the loss function and optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

# Training loop
for epoch in range(num_epochs):
    total_loss = 0.0
    correct_predictions = 0
    total_samples = 0

    for input_batch, target_batch in zip(all_padded_batches_x, all_tensor_batches_y):
        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        output = model(input_batch)

        # Ensure target tensor has the same shape as the output tensor
        target_batch = target_batch.view_as(output)

        # Compute the loss
        loss = criterion(output, target_batch)

        # Backward pass
        loss.backward()

        # Update the parameters
        optimizer.step()

        # Accumulate the total loss for the epoch
        total_loss += loss.item()

        # Calculate accuracy
        predictions = torch.round(torch.sigmoid(output))  # Assuming sigmoid activation for binary classification
        correct_predictions += (predictions == target_batch).sum().item()
        total_samples += target_batch.size(0)

    # Calculate accuracy for the epoch
    accuracy = correct_predictions / total_samples

    # Print the average loss and accuracy for the epoch
    avg_loss = total_loss / len(all_padded_batches_x)
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}, Accuracy: {accuracy:.4f}')

Epoch [1/10], Loss: 0.6978, Accuracy: 0.5625
Epoch [2/10], Loss: 0.6798, Accuracy: 0.5762
Epoch [3/10], Loss: 0.6603, Accuracy: 0.6016
Epoch [4/10], Loss: 0.6405, Accuracy: 0.6191
Epoch [5/10], Loss: 0.6199, Accuracy: 0.6582
Epoch [6/10], Loss: 0.5980, Accuracy: 0.6934
Epoch [7/10], Loss: 0.5750, Accuracy: 0.7305
Epoch [8/10], Loss: 0.5515, Accuracy: 0.7422
Epoch [9/10], Loss: 0.5285, Accuracy: 0.7676
Epoch [10/10], Loss: 0.5066, Accuracy: 0.7812


In [100]:
from RNN import load_imdb
from padconvertion import get_batches

import torch
import torch.nn as nn

import torch.nn.functional as F
import torch.optim as optim
from tqdm import tqdm

device = torch.device('cpu')

(x_train, y_train), (x_val, y_val), (i2w, w2i), numcls = load_imdb(final=False)

x_train, y_train = get_batches(x_train, y_train, batch_size=32)

class SimpleSeq2SeqModel(nn.Module):
    def __init__(self, vocab_size, emb_dim, hidden_dim, num_classes):
        super(SimpleSeq2SeqModel, self).__init__()

        # 1) Embedding layer
        self.embedding = nn.Embedding(vocab_size, emb_dim)

        # 2) Linear layer applied to each token
        self.linear1 = nn.Linear(emb_dim, hidden_dim)

        # 3) ReLU activation
        self.relu = nn.ReLU()

        # 4) Global max pool along the time dimension
        self.global_max_pool = nn.AdaptiveMaxPool1d(1)

        # 5) Linear layer
        self.linear2 = nn.Linear(hidden_dim, num_classes)

    def forward(self, x, hidden):
        # Input x: (batch, time)

        # 1) Embedding layer
        embedded = self.embedding(x)

        # 2) Linear layer applied to each token
        linear_output = self.linear1(embedded)

        # 3) ReLU activation
        relu_output = self.relu(linear_output)

        # 4) Global max pool along the time dimension
        pooled_output = self.global_max_pool(relu_output.permute(0, 2, 1))

        # 5) Linear layer
        linear_output_final = self.linear2(pooled_output.squeeze(dim=2))

        return linear_output_final

learning_rate = 0.001
num_epochs = 5
vocab_size = len(w2i)
emb_dim = 300
hidden_dim = 300

model = SimpleSeq2SeqModel(vocab_size, emb_dim, hidden_dim, numcls)
model.to(device)

optimizer = optim.Adam(model.parameters(), lr=learning_rate)

train_dataset = [(x, y) for x, y in zip(x_train, y_train)]

# Training loop
loss_list = []
hidden = None
for epoch in range(num_epochs):
    total_loss = 0.0
    for input_batch, target_batch in tqdm(train_dataset):
        input, target = input_batch.to(device), target_batch.to(device).long()

        optimizer.zero_grad()

        output = model(input, hidden)

        loss = F.cross_entropy(output, target)

        loss.backward(retain_graph=True)
        optimizer.step()

        total_loss += loss.item()
        loss_list.append(loss.item())
        # Print average loss for the epoch
    average_loss = total_loss / len(train_dataset)
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {average_loss:.4f}')

100%|██████████| 625/625 [03:02<00:00,  3.42it/s]


Epoch [1/5], Loss: 0.3483


100%|██████████| 625/625 [03:49<00:00,  2.73it/s]


Epoch [2/5], Loss: 0.2184


100%|██████████| 625/625 [04:18<00:00,  2.42it/s]


Epoch [3/5], Loss: 0.1433


 12%|█▏        | 73/625 [00:27<03:49,  2.40it/s]