In [1]:
from RNN import load_imdb
import torch
from torch.nn.utils.rnn import pad_sequence
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

In [2]:
(x_train, y_train), (x_val, y_val), (i2w, w2i), numcls = load_imdb(final=False)

Vraag 1

In [3]:
def pad_and_convert_to_tensor(batch_sequences, batch_labels, w2i):
    # Pad sequences to the maximum length
    padded_sequences = pad_sequence([torch.tensor(seq, dtype=torch.long) for seq in batch_sequences], batch_first=True, padding_value=0)
    labels_tensor = torch.tensor(batch_labels, dtype=torch.float32)
    return padded_sequences, labels_tensor

#Define batch_size
batch_size = 32 
all_padded_batches_x = []
all_tensor_batches_y = []

#Select batches of sentences in the trainset
#Define the length of the longest sentence in that batch
#Add zero's to all sentences to reach the same length of the longest sentence
#In the end all_padded_batches_x contains 625 batches of size 32. In total 20000 sentences.
for i in range(0, 500, batch_size):
    batch_x = x_train[i:i+batch_size]
    batch_y = y_train[i:i+batch_size]
    
    padded_batch_x, tensor_batch_y = pad_and_convert_to_tensor(batch_x, batch_y, w2i)
    all_padded_batches_x.append(padded_batch_x)
    all_tensor_batches_y.append(tensor_batch_y)

#print(len(all_padded_batches_x[200][1]))

Vraag 2

In [4]:
##WITHOUT LOSS
def simple_seq2seq_model(vocab_size, emb_dim, hidden_dim, num_classes, input_batches):
    # 1) Embedding layer
    embedding = nn.Embedding(vocab_size, emb_dim)

    # List to store outputs for each batch
    outputs = []

    for input_tensor in input_batches:
        embedded = embedding(input_tensor)

        # 2) Linear layer applied to each token
        linear1 = nn.Linear(emb_dim, hidden_dim)
        linear_output = linear1(embedded)

        # 3) ReLU activation
        relu = nn.ReLU()
        relu_output = relu(linear_output)

        # 4) Global max pool along the time dimension
        global_max_pool = nn.AdaptiveMaxPool1d(1)
        pooled_output = global_max_pool(relu_output.permute(0, 2, 1))

        # 5) Linear layer
        linear2 = nn.Linear(hidden_dim, num_classes)
        linear_output_final = linear2(pooled_output.squeeze(dim=2))

        outputs.append(linear_output_final)

    # Stack outputs into a single tensor
    outputs_tensor = torch.stack(outputs)

    return outputs_tensor

# Example usage
vocab_size = len(w2i)
emb_dim = 300       
hidden_dim = 300   
num_classes = 1  # because binary classification

# Forward pass for all batches
all_outputs = simple_seq2seq_model(vocab_size, emb_dim, hidden_dim, num_classes, all_padded_batches_x)

# Print the overall output shape
print(all_outputs.shape)



torch.Size([16, 32, 1])


In [6]:
#WITH LOSS
def simple_seq2seq_model(vocab_size, emb_dim, hidden_dim, num_classes, input_batches, target_batches):

    outputs = []

    for input_tensor in input_batches:
         # 1) Embedding layer
        embedding = nn.Embedding(vocab_size, emb_dim)
        embedded = embedding(input_tensor)

        # 2) Linear layer applied to each token
        linear1 = nn.Linear(emb_dim, hidden_dim)
        linear_output = linear1(embedded)

        # 3) ReLU activation
        relu = nn.ReLU()
        relu_output = relu(linear_output)

        # 4) Global max pool along the time dimension
        global_max_pool = nn.AdaptiveMaxPool1d(1)
        pooled_output = global_max_pool(relu_output.permute(0, 2, 1))

        # 5) Linear layer
        linear2 = nn.Linear(hidden_dim, num_classes)
        linear_output_final = linear2(pooled_output.squeeze(dim=2))

        outputs.append(linear_output_final)

    # Stack outputs into a single tensor
    outputs_tensor = torch.stack(outputs)

    # Sigmoid activation and BCE loss
    softmax = nn.Softmax(dim=1)
    predictions = softmax(outputs_tensor)
    target_tensor = torch.stack(target_batches).long()
    loss = F.cross_entropy(predictions, target_tensor)
    print("Loss:", loss.item())

    return outputs_tensor

# Example usage
vocab_size = len(w2i)
emb_dim = 300       
hidden_dim = 300   
num_classes = 1  # because binary classification

# Forward pass for all batches
all_outputs = simple_seq2seq_model(vocab_size, emb_dim, hidden_dim, num_classes, all_padded_batches_x, all_tensor_batches_y)

# Print the overall output shape
print(all_outputs.shape)



IndexError: Dimension out of range (expected to be in range of [-2, 1], but got 2)

Vraag 3

In [29]:
class SimpleSeq2SeqModel(nn.Module):
    def __init__(self, vocab_size, emb_dim, hidden_dim, num_classes):
        super(SimpleSeq2SeqModel, self).__init__()

        # 1) Embedding layer
        self.embedding = nn.Embedding(vocab_size, emb_dim)

        # 2) Linear layer applied to each token
        self.linear1 = nn.Linear(emb_dim, hidden_dim)

        # 3) ReLU activation
        self.relu = nn.ReLU()

        # 4) Global max pool along the time dimension
        self.global_max_pool = nn.AdaptiveMaxPool1d(1)

        # 5) Linear layer
        self.linear2 = nn.Linear(hidden_dim, num_classes)

    def forward(self, x):
        # Input x: (batch, time)

        # 1) Embedding layer
        embedded = self.embedding(x)

        # 2) Linear layer applied to each token
        linear_output = self.linear1(embedded)

        # 3) ReLU activation
        relu_output = self.relu(linear_output)

        # 4) Global max pool along the time dimension
        pooled_output = self.global_max_pool(relu_output.permute(0, 2, 1))

        # 5) Linear layer
        linear_output_final = self.linear2(pooled_output.squeeze(dim=2))

        return linear_output_final

In [31]:
# Define your hyperparameters
vocab_size = len(w2i)
emb_dim = 300
hidden_dim = 300
num_classes = 1
learning_rate = 0.001
num_epochs = 10
batch_size = 32

# Create an instance of your model
model = SimpleSeq2SeqModel(vocab_size, emb_dim, hidden_dim, num_classes)

# Define your optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
sigmoid = torch.nn.Sigmoid()

# Create DataLoader for training batches
train_dataset = [(x, y) for x, y in zip(all_padded_batches_x, all_tensor_batches_y)]
train_data_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Training loop
for epoch in range(num_epochs):
    total_loss = 0.0

    # Iterate over batches from the data loader
    for input_batch, target_batch in train_data_loader:
        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        output = model(input_batch)

        # Apply sigmoid activation
        predictions = sigmoid(output)

        # Calculate BCE loss
        target_tensor = target_batch.unsqueeze(1).float()
        loss = F.binary_cross_entropy(predictions, target_tensor)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        # Accumulate the total loss for monitoring
        total_loss += loss.item()

    # Print average loss for the epoch
    average_loss = total_loss / len(train_data_loader)
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {average_loss:.4f}')



TypeError: __init__() takes 5 positional arguments but 7 were given