In [6]:
import torch
import argparse
import numpy as np
from pathlib import Path

from src.dataset import ParityPredictionDataset, HiddenParityPrediction
from src.model import TinyModel, TinyTransformer
from src.train import train_model
from src.plot import (
    plot_losses,
    plot_accuracies,
    plot_line_with_label,
    plot_list_of_lines_and_labels,
)
from src.common import get_accuracy_on_dataset

weight_decay = 1e-2
learning_rate = 1e-1
batch_size = 32
hidden_size = 1000
number_samples = 770
epochs = 400

# Replicability
np.random.seed(0)

In [2]:
# Create the training dataset
entire_dataset = HiddenParityPrediction(
    num_samples=number_samples,
    sequence_length=40,
    k=3,
)

# Split into training and validation should be 1000 and 100
train_size = int(0.90 * number_samples)
val_size = number_samples - train_size
training_dataset, validation_dataset = torch.utils.data.random_split(
    entire_dataset, [train_size, val_size])

print(f"Training dataset size: {len(training_dataset)}")
print(f"Validation dataset size: {len(validation_dataset)}")

Training dataset size: 693
Validation dataset size: 77


In [5]:
# Create the model
import torch
import torch.nn as nn
import math

class TransformerModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, nhead, nlayers):
        super(TransformerModel, self).__init__()
        
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        self.pos_encoder = PositionalEncoding(self.hidden_size)
        self.transformer_encoder = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(d_model=self.hidden_size, nhead=nhead),
            num_layers=nlayers,
        )
        self.fc1 = nn.Linear(self.input_size * self.hidden_size, self.hidden_size)
        self.fc2 = nn.Linear(self.hidden_size, self.output_size, bias=False)
        
        # Initialise weights
        nn.init.xavier_uniform_(self.fc1.weight)
        nn.init.xavier_uniform_(self.fc2.weight)

    def forward(self, src):
        src = self.pos_encoder(src)
        output = self.transformer_encoder(src)
        output = output.flatten(start_dim=1)
        output = torch.relu(self.fc1(output))
        output = self.fc2(output)
        return output

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        return self.dropout(x)

model = TransformerModel(
    input_size=40,
    hidden_size=hidden_size,
    output_size=1,
    nhead=1,
    nlayers=1,
)

TypeError: TransformerModel.__init__() got an unexpected keyword argument 'hidden_size'

In [4]:
(
    model,
    training_losses,
    validation_losses,
    training_accuracy,
    validation_accuracy,
    _,
) = train_model(
    training_dataset=training_dataset,
    validation_dataset=validation_dataset,
    model=model,
    learning_rate=learning_rate,
    weight_decay=weight_decay,
    epochs=epochs,
    batch_size=batch_size,
    loss_function_label="hinge",
    optimiser_function_label="sgd",
    progress_bar=True,
)

  0%|          | 0/400 [00:01<?, ?it/s]

Input size = torch.Size([32, 40]), target size = torch.Size([32])





AttributeError: 'NoneType' object has no attribute 'size'