In [171]:
from typing import List
from sklearn.preprocessing import LabelEncoder
from sklearn import utils

import numpy as np
import torch
from torch import nn
from torch import optim
from torch.nn import functional as F

In [3]:
# fizzbuzz(x):
#  | "fizzbuzz" if x % 15 == 0
#  | "fizz" if x % 3 == 0
#  | "buzz" if x % 5 == 0

# d := [0, 100]
# x := [101, 1024]
# y := [f(x)]

def get_binary(number: int) -> List[int]:
    """Given a int, returns its little-endian notation."""

    return [number >> i & 1 for i in range(10)]

def fizzbuzz(x):
    """The actual function that we are trying to learn."""

    if x % 15 == 0:
        return 'fizzbuzz'
    elif x % 3 == 0:
        return 'fizz'
    elif x % 5 == 0:
        return 'buzz'
    return ''

def build_dataset(lower: int, upper: int) -> List[List[int]]:
    """Builds fizzbuzz labels in the interval [lower, upper)."""

    X = list()
    y = list()
    for x in range(lower, upper):
        X.append(get_binary(x))
        y.append(fizzbuzz(x))

    return X, y

In [154]:
class FizzBuzz(nn.Module):
# architecture:
#    input -> FC(ReLU) -> FC(ReLU) -> Softmax

    def __init__(self, input_sz:int, h1: int, h2: int, output_sz: int) -> None:
        super(FizzBuzz, self).__init__()
        
        self.linear1 = nn.Linear(input_sz, h1)
        self.linear2 = nn.Linear(h1, h2)
        self.projection = nn.Linear(h2, output_sz)

    def forward(self, inputs):
        x = F.relu(self.linear1(inputs))
        x = F.relu(self.linear2(x))
        x = self.projection(x)

        return x

In [173]:
encoder = LabelEncoder()

X, y = build_dataset(101, 1024)
y = encoder.fit_transform(y)

model = FizzBuzz(10, 25, 25, 4)
optimizer = optim.SGD(model.parameters(), lr=1e-2)
criterion = nn.CrossEntropyLoss()

EPOCHS = 1
n_samples = len(X)
BATCH_SIZE = 32

ceil = lambda x: int(np.ceil(x))

X = torch.tensor(X, dtype=torch.float32)
y = torch.from_numpy(y)

for epoch in range(5000):
    epoch_loss = 0
    
    permutations = torch.randperm(n_samples)
    X = X[permutations]
    y = y[permutations]

    n_batches = ceil(n_samples / BATCH_SIZE)
    for batch_no in range(n_batches):
        lower = batch_no * BATCH_SIZE
        upper = min(n_samples, (batch_no + 1) * BATCH_SIZE)

        x_in = X[lower:upper]
        y_in = y[lower:upper]
        
        model.zero_grad()
        class_scores = model(x_in)
        loss = criterion(class_scores, y_in)
        
        loss.backward()
        optimizer.step()
    
        epoch_loss += loss.item()

    if epoch % 100 == 0:
        print('#%4d epoch loss: %4.4f' % (epoch, epoch_loss))

#   0 epoch loss: 39.6974
# 100 epoch loss: 32.9657
# 200 epoch loss: 32.9005
# 300 epoch loss: 32.8882
# 400 epoch loss: 32.8670
# 500 epoch loss: 32.8174
# 600 epoch loss: 32.6350
# 700 epoch loss: 32.3026
# 800 epoch loss: 31.6756
# 900 epoch loss: 30.3131
#1000 epoch loss: 28.2432
#1100 epoch loss: 25.2926
#1200 epoch loss: 21.1403
#1300 epoch loss: 17.7151
#1400 epoch loss: 14.5922
#1500 epoch loss: 12.1253
#1600 epoch loss: 9.9792
#1700 epoch loss: 8.3403
#1800 epoch loss: 7.0794
#1900 epoch loss: 6.0335
#2000 epoch loss: 5.1766
#2100 epoch loss: 4.5295
#2200 epoch loss: 3.9658
#2300 epoch loss: 3.6322
#2400 epoch loss: 2.9890
#2500 epoch loss: 2.7784
#2600 epoch loss: 2.6345
#2700 epoch loss: 2.3683
#2800 epoch loss: 2.3767
#2900 epoch loss: 1.9410
#3000 epoch loss: 1.8338
#3100 epoch loss: 1.7681
#3200 epoch loss: 1.4803
#3300 epoch loss: 1.4544
#3400 epoch loss: 1.2793
#3500 epoch loss: 1.2542
#3600 epoch loss: 1.1661
#3700 epoch loss: 1.0730
#3800 epoch loss: 0.9201
#3900 epo

In [176]:
from sklearn.metrics import confusion_matrix
confusion_matrix(model(X).argmax(-1), y)

array([[493,   0,   1,   0],
       [  0, 122,   0,   0],
       [  0,   0, 245,   0],
       [  0,   0,   0,  62]])