In [1]:
import torch
import torchvision
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.functional import relu


# Data loading and Transformers

## Transformers:
* ToTensor(): This transformation converts images to the torch.Tensor() format.
* Normalize(): This transformation normalizes images with a specified mean and standard deviation.
* Lambda(): This transformation applies an arbitrary function to the images.

## Data Loading:

In this section, the training and test data from the MNIST dataset were loaded. The batch size was also reduced to 10,000 and 1,000 to address the memory issue and DataLoader() was used to create data batches.

In [2]:
torch.manual_seed(0)
transform = torchvision.transforms.Compose([transforms.ToTensor(),
                                            transforms.Normalize((0.1,),(0.1,)),
                                               transforms.Lambda(lambda x: torch.flatten(x))])
train_data = MNIST(root = "./", train= True, transform = transform ,download=True)
test_data = MNIST(root = "./", train= False, transform = transform ,download=False)

train_size = 10000
test_size = 1000
train_loader = DataLoader(dataset=train_data, batch_size=train_size, shuffle=True)
test_loader = DataLoader(dataset=test_data, batch_size=test_size, shuffle=True)


device = "cuda" if torch.cuda.is_available() else "cpu"


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 80406971.73it/s]


Extracting ./MNIST/raw/train-images-idx3-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 32615964.95it/s]

Extracting ./MNIST/raw/train-labels-idx1-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./MNIST/raw/t10k-images-idx3-ubyte.gz



100%|██████████| 1648877/1648877 [00:00<00:00, 19500280.26it/s]


Extracting ./MNIST/raw/t10k-images-idx3-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 7430003.42it/s]


Extracting ./MNIST/raw/t10k-labels-idx1-ubyte.gz to ./MNIST/raw



# Learning Class

The Learning class is a custom neural network layer that implements the forward-forward algorithm. The class has two main methods: forward() and learn().

## Forward Method

The forward() method computes the output of the layer for a given input. The input is first normalized and then passed through a ReLU activation function.




```
def forward(self, x):
  x_norm = torch.norm(x, 2, 1, keepdim=True)
  x_ = x/(x_norm + self.eps)
  return relu((x_ @ self.w.T) + self.b.unsqueeze(dim = 0))
```



The x_norm is x L2 norm and epsilon is a value to avoid division by zero.

## Learn Method

The learn() method trains the layer's weights and biases using the forward-forward algorithm. The method takes two input tensors: x_pos and x_neg, representing positive and negative examples, respectively. The method performs the following steps for each epoch:

*   Compute the goodness for positive and negative examples and Concatenate the positive and negative goodness values and compute the loss using the loss function.
$$ \log(1 + e^{[(threshold - g_{pos})\, ,\, (g_{neg} - threshold)]})$$
*   Compute the mean square error (MSE)

*   Zero the gradients of the layer's parameters and Compute the backward (derivatives) pass using the loss function.

*   Update the layer's parameters using the optimizer.

*   Print the current epoch and loss value and Return the updated outputs for positive and negative examples.



---
# Answer the question
The necessary conditions for goodness for positive and negative data are as follows:

*   For positive data, goodness must be positive.

*   For negative data, goodness must be negative.


The loss function defined above can satisfy these conditions by using the exponantial function. If goodness for positive data is positive, the exp function will map it to a large value whose log is also positive. If goodness for negative data is negative, the exp function will map it to a small value whose log is also negative.

In [3]:
class Learning(nn.Module):
    def __init__(self, in_features, out_features, num_epochs = 10 ,threshold = 2.0, lr =0.01):
        super().__init__()
        self.w = torch.nn.Parameter(torch.randn(out_features, in_features))
        self.b = torch.nn.Parameter(torch.randn(out_features))
        self.lr = lr
        self.optimizer = optim.Adam(self.parameters(), lr = self.lr)
        self.threshold = threshold
        self.num_epochs = num_epochs
        self.eps = 1e-4


    def forward(self, x):
        x_norm = torch.norm(x, 2, 1, keepdim=True)
        x_ = x/(x_norm + self.eps)
        return relu((x_ @ self.w.T) + self.b.unsqueeze(dim = 0))

    def learn(self, x_pos, x_neg):
        for epoch in range(self.num_epochs):
            g_pos = torch.pow(self.forward(x_pos), 2).mean(dim = 1)
            g_neg = torch.pow(self.forward(x_neg),2).mean(dim = 1)

            loss = torch.log(1 + torch.exp(torch.cat([self.threshold - g_pos ,
                                                       g_neg - self.threshold]))).mean()
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()

            print(f"{epoch+1}/{self.num_epochs} --- Loss: {loss.item():.4f}")

        return  (
            self.forward(x_pos).detach(),
              self.forward(x_neg).detach()
              )



# FF class

Learning class defines the forward-forward model for the problem. It first creates the layers according to the **Learning** class described above, based on the number of neurons (hidden_dims) it takes.

The methods are implemented:

* Generate_data( ): This method generates the desired data (positive and negative data) by placing the data in the first ten pixels of the image.
* Predict( ): In this method, the prediction is performed. First, a dataset is created for each label based on the data passing through the layers and calculating the amount of Goodness. Then the amount of goodness for each label is calculated. And finally, the prediction is made on the data based on which label has the higher goodness value.
* learn( ): Learning takes positive and negative data as argument in this method and use the learn method that implemented FFnet class.

In [4]:
class FF(nn.Module):
    def __init__(self, hidden_dims, device = 'cpu', num_epochs = 10, lr = 0.01, threshold = 2.0):
        super().__init__()
        self.layers = nn.ModuleList()
        self.device = device
        self.lr = lr
        for i in range(len(hidden_dims)-1):
            self.layers.append(Learning(hidden_dims[i], hidden_dims[i+1], num_epochs = num_epochs, lr = self.lr, threshold = threshold).to(self.device))
        self.num_class = 10
        print(f"---- Device: {device} ----")

    def Generate_data(self, x, y):
        x_s = x.clone()
        x_s[range(x.shape[0]),:self.num_class] = 0
        x_s[range(x.shape[0]),y] = x.max()
        return x_s.to(self.device)

    def predict(self, x):
        label_goodness = []
        for label in range(self.num_class):
            a = self.Generate_data(x, label)
            goodness = []
            for layer in self.layers:
              a = layer(a)
              goodness.append(torch.pow(a , 2).mean(dim = 1))
            label_goodness.append(sum(goodness).unsqueeze(dim = 1))
        label_goodness = torch.cat(label_goodness, dim = 1)
        return torch.argmax(label_goodness, dim = 1)

    def learn(self, x_pos, x_neg):
        for inx in range(len(self.layers)):
            print()
            print(f" Layer {inx +1 } : ")
            x_pos, x_neg = self.layers[inx].learn(x_pos, x_neg)


# Main


*   Set Hyperparameters
*   Learn model
*   evaluate model



In [5]:
num_epochs = 350
lr = 0.15
threshold = 2
hidden_dims = [784, 1000, 500, 200]
model = FF(hidden_dims ,device=device,num_epochs=num_epochs, lr = lr, threshold=threshold)
x_train, labels_train = next(iter(train_loader))
x_train = x_train.to(device)
labels_train = labels_train.to(device)
pos_data = model.Generate_data(x_train, labels_train)
inx = torch.randint(low=0,high=x_train.shape[0],size = (x_train.shape[0],))
neg_data  = model.Generate_data(x_train,labels_train[inx])
model.learn(pos_data,neg_data)

print("--------------------------------------------------")
print()
train_accuracy = 100*(model.predict(x_train) == (labels_train)).sum()/x_train.shape[0]
print(f"train accuracy: {train_accuracy.item():.2f} %")

x_ts, y_ts = next(iter(test_loader))
x_ts = x_ts.to(device)
y_ts = y_ts.to(device)

test_accuracy = 100*(model.predict(x_ts) == (y_ts)).sum()/x_ts.shape[0]
print(f"test accuracy: {test_accuracy.item():.2f} %")


---- Device: cuda ----

 Layer 1 : 
1/350 --- Loss: 0.8168
2/350 --- Loss: 1.9658
3/350 --- Loss: 0.6979
4/350 --- Loss: 0.8691
5/350 --- Loss: 0.9771
6/350 --- Loss: 1.0041
7/350 --- Loss: 0.9937
8/350 --- Loss: 0.9549
9/350 --- Loss: 0.8870
10/350 --- Loss: 0.7951
11/350 --- Loss: 0.7127
12/350 --- Loss: 0.7125
13/350 --- Loss: 0.7989
14/350 --- Loss: 0.8333
15/350 --- Loss: 0.7802
16/350 --- Loss: 0.7151
17/350 --- Loss: 0.6949
18/350 --- Loss: 0.7128
19/350 --- Loss: 0.7382
20/350 --- Loss: 0.7527
21/350 --- Loss: 0.7517
22/350 --- Loss: 0.7374
23/350 --- Loss: 0.7161
24/350 --- Loss: 0.6974
25/350 --- Loss: 0.6911
26/350 --- Loss: 0.7004
27/350 --- Loss: 0.7147
28/350 --- Loss: 0.7192
29/350 --- Loss: 0.7100
30/350 --- Loss: 0.6962
31/350 --- Loss: 0.6885
32/350 --- Loss: 0.6897
33/350 --- Loss: 0.6954
34/350 --- Loss: 0.7000
35/350 --- Loss: 0.7003
36/350 --- Loss: 0.6962
37/350 --- Loss: 0.6900
38/350 --- Loss: 0.6852
39/350 --- Loss: 0.6843
40/350 --- Loss: 0.6869
41/350 --- Lo

### Inspired by ----> YouTube Channel: **Data Scholar**