In [1]:
from typing import List

import numpy as np
import torch
from torch import nn
from torch import optim
from torch.nn import functional as F
from sklearn.preprocessing import LabelEncoder
from sklearn import utils
from sklearn import metrics

In [2]:
# fizzbuzz(x):
#  | "fizzbuzz" if x % 15 == 0
#  | "fizz" if x % 3 == 0
#  | "buzz" if x % 5 == 0
#  | str(x) otherwise

# x := [0, 100]
# y := [f(x)]

def get_binary(number: int, nbits: int) -> List[int]:
    """Given a int, returns its little-endian notation."""

    return [number >> i & 1 for i in range(nbits)]


def fizzbuzz(x):
    """The actual function that we are trying to learn."""

    if x % 15 == 0:
        return 'fizzbuzz'
    elif x % 3 == 0:
        return 'fizz'
    elif x % 5 == 0:
        return 'buzz'
    return ''


def build_dataset(lower: int, upper: int, nbits: int) -> List[List[int]]:
    """Builds fizzbuzz labels in the interval [lower, upper)."""

    X = list()
    y = list()
    for x in range(lower, upper + 1):
        X.append(get_binary(x, nbits))
        y.append(fizzbuzz(x))

    return X, y

In [3]:
class FizzBuzz(nn.Module):
# architecture:
#    input -> FC(ReLU) -> FC(ReLU) -> Softmax

    def __init__(self, input_sz:int, h1: int, h2: int, output_sz: int) -> None:
        super(FizzBuzz, self).__init__()
        
        self.linear1 = nn.Linear(input_sz, h1)
        self.linear2 = nn.Linear(h1, h2)
        self.projection = nn.Linear(h2, output_sz)

    def forward(self, inputs):
        x = F.relu(self.linear1(inputs))
        x = F.relu(self.linear2(x))
        x = self.projection(x)

        return x

In [4]:
ceil = lambda x: int(np.ceil(x))

def trainer(model: nn.Module,
            X: torch.Tensor,
            y: torch.Tensor,
            n_epochs: int,
            batch_size: int,
            validation_frac: float,
            log_every: int
           ) -> None:
    """Helper function to train the model.
    :param model: The PyTorch model
    :param X: Tensors representing the sample features
    :param y: Tensors with categorical labels
    :param n_epochs
    :param batch_size
    :param validation_frac: Percentage of the samples used for validation
    only after shuffling
    :param log_every: Evaluate the model and log its metrics after how
    many epochs
    :return None
    """
    n_samples = X.shape[0]
    val_samples = ceil(n_samples * validation_frac)
    trn_samples = n_samples - val_samples
    
    n_batches = ceil(trn_samples / BATCH_SIZE)
    
    # first permute before separating the validation samples
    permutations = torch.randperm(n_samples)
    X = X[permutations]
    y = y[permutations]
    
    X_val = X[:val_samples]
    y_val = y[:val_samples]
    
    # the remaining are train samples
    X_trn = X[val_samples:]
    y_trn = y[val_samples:]
    
    for epoch in range(1, n_epochs + 1):
        epoch_loss = 0

        # ensuring that the models sees samples in different order / epoch
        permutations = torch.randperm(trn_samples)
        X_trn = X_trn[permutations]
        y_trn = y_trn[permutations]

        for batch_no in range(n_batches):            
            lower = batch_no * BATCH_SIZE
            upper = min(n_samples, (batch_no + 1) * BATCH_SIZE)

            x_in = X_trn[lower:upper]
            y_in = y_trn[lower:upper]

            model.zero_grad()
            class_scores = model(x_in)
            loss = criterion(class_scores, y_in)

            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()

        if epoch % log_every == 0:
            predictions = model(X_trn).argmax(-1)
            trn_accuracy = metrics.accuracy_score(predictions, y_trn)

            predictions = model(X_val).argmax(-1)
            val_accuracy = metrics.accuracy_score(predictions, y_val)
            print('Epoch %4d train loss: %4.4f train accuracy %4.4f valid. accuracy %4.4f' % 
                  (epoch, epoch_loss, trn_accuracy, val_accuracy))


# network hyperparams
HIDDEN_SIZE = 20
N_BITS = 10
N_LABELS = 4
EPOCHS = 20000
BATCH_SIZE = 32
LEARNING_RATE = 1e-2

# building the dataset
encoder = LabelEncoder()

X, y = build_dataset(101, 1024, N_BITS)
y = encoder.fit_transform(y)

# the model
model = FizzBuzz(N_BITS, HIDDEN_SIZE, HIDDEN_SIZE, N_LABELS)
optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE)
criterion = nn.CrossEntropyLoss()

# training
X = torch.tensor(X, dtype=torch.float32)
y = torch.from_numpy(y)
            
trainer(model, n_epochs=EPOCHS, batch_size=BATCH_SIZE, X=X, y=y, validation_frac=0.2, log_every=1000)

Epoch 1000 train loss: 26.0902 train accuracy 0.5413 valid. accuracy 0.4973
Epoch 2000 train loss: 15.4321 train accuracy 0.7564 valid. accuracy 0.6541
Epoch 3000 train loss: 3.1704 train accuracy 0.9689 valid. accuracy 0.8595
Epoch 4000 train loss: 1.1123 train accuracy 0.9878 valid. accuracy 0.9135
Epoch 5000 train loss: 0.4288 train accuracy 0.9986 valid. accuracy 0.9243
Epoch 6000 train loss: 0.2127 train accuracy 1.0000 valid. accuracy 0.9243
Epoch 7000 train loss: 0.1500 train accuracy 1.0000 valid. accuracy 0.9297
Epoch 8000 train loss: 0.0924 train accuracy 1.0000 valid. accuracy 0.9243
Epoch 9000 train loss: 0.0708 train accuracy 1.0000 valid. accuracy 0.9243
Epoch 10000 train loss: 0.0551 train accuracy 1.0000 valid. accuracy 0.9297
Epoch 11000 train loss: 0.0459 train accuracy 1.0000 valid. accuracy 0.9243
Epoch 12000 train loss: 0.0381 train accuracy 1.0000 valid. accuracy 0.9243
Epoch 13000 train loss: 0.0324 train accuracy 1.0000 valid. accuracy 0.9243
Epoch 14000 train l

In [6]:
# test metrics 

X_tst, y_tst = build_dataset(1, 100, N_BITS)
X_tst = torch.tensor(X_tst, dtype=torch.float32)
y_tst = encoder.transform(y_tst)

model.eval()
y_hat = model(X_tst).argmax(-1)

acc = metrics.accuracy_score(y_tst, y_hat)
print('Test accyracy: ', acc)

Test accyracy:  0.79
