# Testing out the Forward-forward algo on Make_Moons' Toy Data

It doesn't seem to work all that well -- 

In [25]:
# Forward-forward Implementation in Torch
import torch
import torch.nn as nn
from tqdm import tqdm
from torch.optim import Adam
from torchvision.datasets import MNIST
from torchvision.transforms import Compose, ToTensor, Normalize, Lambda
from torch.utils.data import DataLoader

from sklearn.datasets import make_moons, make_blobs


# Vars
device = "cuda" if torch.cuda.is_available() else "cpu"
random_seed = 42


# Util to Load in MNIST Data --> used in Hinton's example
def MNIST_loaders(train_batch_size=50000, test_batch_size=10000):

    # Standard Transformations to the MNIST Dataset
    transform = Compose([
        ToTensor(),
        Normalize((0.1307,), (0.3081,)),
        Lambda(lambda x: torch.flatten(x))])

    # Instantiate Loaders (can call these for randomized batches at train/inference time)
    train_loader = DataLoader(
        MNIST('./data/', train=True,
              download=True,
              transform=transform),
        batch_size=train_batch_size, shuffle=True)

    test_loader = DataLoader(
        MNIST('./data/', train=False,
              download=True,
              transform=transform),
        batch_size=test_batch_size, shuffle=False)

    return train_loader, test_loader

# Toy Dataset
def MOON_dataset(train_size=50000, test_size=10000):
    n_total = train_size + test_size
    x, y = make_moons(n_total, noise=0.0, random_state=random_seed)
    # x, y = make_blobs(n_samples=n_total, random_state=42) #not the traditional moon but reasonable to learn

    trainset = (torch.tensor(x[:train_size]), torch.tensor(y[:train_size]))
    testset =  (torch.tensor(x[-test_size:]), torch.tensor(y[-test_size:]))

    return trainset, testset


# Get Loaders for Datasets
# train_loader, test_loader = MNIST_loaders() 
train_loader, test_loader = MOON_dataset() 


In [26]:
# Add the Label information to X Data (mentioned in Hinton's paper, see Section 3.3)
def overlay_y_on_x(x, y):
    """
    x is a single training instance (MNIST Vec of len 784) and y is the scalar value representing the label 
    """
    x_ = x.clone()
    x_[:, :10] *= 0.0 #two classes for Moons -- would be 10 for MNIST
    x_[range(x.shape[0]), y] = x.max()
    return x_

def insert_moon_label(x, y):
    "method to insert the label for moons dataset"
    x_train_insert = []

    for i in range(x.shape[0]):
        new_data = x[i].tolist()
        new_data.append(y[i].item())
        x_train_insert.append(new_data)
    return torch.tensor(x_train_insert)



In [27]:
# FF Code from Repo
# Network Class that Instantiates our Custom Layers Class & Implements the Train/Predict Funcs
class Net(torch.nn.Module):

    def __init__(self, dims):
        super().__init__()
        self.layers = []
        # Append Layers w Correct Dimensions for Weight Matrices -- dims is a list of ints (i.e; list[int])
        for d in range(len(dims) - 1):
            self.layers += [Layer(dims[d], dims[d + 1]).cuda()]

    def predict(self, x):
        goodness_per_label = []
        for label in range(2):
            h = overlay_y_on_x(x, label) #put current label in iteration on training instance
            goodness = []
            for layer in self.layers:
                h = layer(h)
                goodness += [h.pow(2).mean(1)] #sum of squared "goodness" -- take max for pred
            # Compute Goodness for Current Label in Range Iters
            goodness_per_label += [sum(goodness).unsqueeze(1)]

        # Get Goodness Over all Labels 
        goodness_per_label = torch.cat(goodness_per_label, 1)
        return goodness_per_label.argmax(1)

    def train(self, x_pos, x_neg):
        h_pos, h_neg = x_pos, x_neg
        for i, layer in enumerate(self.layers):
            print('training layer', i, '...')
            h_pos, h_neg = layer.train(h_pos, h_neg)


class Layer(nn.Linear):
    def __init__(self, in_features, out_features,
                 bias=True, device=None, dtype=None):
        super().__init__(in_features, out_features, bias, device, dtype)
        self.relu = torch.nn.ReLU()
        self.opt = Adam(self.parameters(), lr=0.05)
        self.threshold = 2.0
        # self.num_epochs = 1000
        self.num_epochs = 1000 #10x increase in epochs, what do?

    def forward(self, x):
        x_direction = x / (x.norm(2, 1, keepdim=True) + 1e-4)
        return self.relu(
            torch.mm(x_direction, self.weight.T) +
            self.bias.unsqueeze(0))

    def train(self, x_pos, x_neg):
        for i in tqdm(range(self.num_epochs)):
            g_pos = self.forward(x_pos).pow(2).mean(1)
            g_neg = self.forward(x_neg).pow(2).mean(1)
            # The following loss pushes pos (neg) samples to
            # values larger (smaller) than the self.threshold.
            loss = torch.log(1 + torch.exp(torch.cat([
                -g_pos + self.threshold,
                g_neg - self.threshold]))).mean()
            self.opt.zero_grad()
            # this backward just compute the derivative and hence
            # is not considered backpropagation.
            loss.backward()
            self.opt.step()
        return self.forward(x_pos).detach(), self.forward(x_neg).detach()


In [28]:
# Main Training Loop -- Moons

# train_loader, test_loader = MNIST_loaders() #loaders load in the entirety of the MNIST Set

# Instantiate Model + Data
net = Net([3, 500, 500])
# x, y = next(iter(train_loader))
x, y = train_loader[0], train_loader[1]
x, y = x.cuda(), y.cuda()

# x_pos = overlay_y_on_x(x, y) #add actual labels to training instances
x_pos = insert_moon_label(x, y) #add actual labels to training instances
x_pos = x_pos.to(device)

# Create Random Label for x_negative
rnd = torch.randperm(x.size(0))
x_neg = insert_moon_label(x, y[rnd])
x_neg = x_neg.to(device)


# Go Forward-forward
net.train(x_pos, x_neg)

# y_eval = torch.zeros(x.shape[0])
y_eval = torch.rand(x.shape[0]) #does including a bit of stochasticity make it better?
x_eval = insert_moon_label(x, y_eval)
x_eval = x_eval.to(device)

# print('train error:', 1.0 - net.predict(x).eq(y).float().mean().item()) #switched to below, need include extra dim
print('train error:', 1.0 - net.predict(x_eval).eq(y).float().mean().item())

# x_te, y_te = next(iter(test_loader))
x_test, y_test = test_loader[0], test_loader[1]
x_test, y_test = x_test.cuda(), y_test.cuda()
x_test = insert_moon_label(x_test, y_test) #add actual labels to training instances
x_test = x_test.to(device)

print('test error:', 1.0 - net.predict(x_test).eq(y_test).float().mean().item())

training layer 0 ...


100%|██████████| 1000/1000 [00:10<00:00, 97.96it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:17<00:00, 55.79it/s]


train error: 0.4980599880218506
test error: 0.5097000002861023
