<a href="https://colab.research.google.com/github/davidraamirez/GradientWithoutBackpropagation/blob/main/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Gradient Without Backpropagation

In [22]:
import torch
import tensorflow_datasets as tfds
from sklearn.model_selection import train_test_split
import tqdm
import torch.distributions as distr

In [2]:
%pip install torchmetrics --quiet

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/512.4 KB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━[0m [32m235.5/512.4 KB[0m [31m7.3 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m512.4/512.4 KB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [3]:
import torchmetrics
import torchvision
from torchvision import transforms as T

Loading and preprocessing the dataset

In [4]:
#Load the dataset
train_data = torchvision.datasets.KMNIST('./data', train=True, download=True)

Downloading http://codh.rois.ac.jp/kmnist/dataset/kmnist/train-images-idx3-ubyte.gz
Downloading http://codh.rois.ac.jp/kmnist/dataset/kmnist/train-images-idx3-ubyte.gz to ./data/KMNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/18165135 [00:00<?, ?it/s]

Extracting ./data/KMNIST/raw/train-images-idx3-ubyte.gz to ./data/KMNIST/raw

Downloading http://codh.rois.ac.jp/kmnist/dataset/kmnist/train-labels-idx1-ubyte.gz
Downloading http://codh.rois.ac.jp/kmnist/dataset/kmnist/train-labels-idx1-ubyte.gz to ./data/KMNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/29497 [00:00<?, ?it/s]

Extracting ./data/KMNIST/raw/train-labels-idx1-ubyte.gz to ./data/KMNIST/raw

Downloading http://codh.rois.ac.jp/kmnist/dataset/kmnist/t10k-images-idx3-ubyte.gz
Downloading http://codh.rois.ac.jp/kmnist/dataset/kmnist/t10k-images-idx3-ubyte.gz to ./data/KMNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/3041136 [00:00<?, ?it/s]

Extracting ./data/KMNIST/raw/t10k-images-idx3-ubyte.gz to ./data/KMNIST/raw

Downloading http://codh.rois.ac.jp/kmnist/dataset/kmnist/t10k-labels-idx1-ubyte.gz
Downloading http://codh.rois.ac.jp/kmnist/dataset/kmnist/t10k-labels-idx1-ubyte.gz to ./data/KMNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/5120 [00:00<?, ?it/s]

Extracting ./data/KMNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/KMNIST/raw



In [5]:
# Alternative way to get the first element: image, label = next(iter(train_data))
for image, label in train_data:
  break

In [6]:
# Simple transformation that converts the PIL image to a PyTorch array
T.ToTensor()(image).shape

torch.Size([1, 28, 28])

In [7]:
# T.Compose allows to chain together multiple transformations
train_transforms = T.Compose([
    T.ToTensor()
])

In [8]:
# This loads data with both data conversion.
train_data = torchvision.datasets.KMNIST('./data', train=True, transform=train_transforms)

In [9]:
# Loaders are used to shuffle, batch, and possibly sample the elements of the dataset
train_loader = torch.utils.data.DataLoader(train_data, batch_size=64, shuffle=True)

In [10]:
xb, yb = next(iter(train_loader))
print(xb.shape)
print(yb.shape)

torch.Size([64, 1, 28, 28])
torch.Size([64])


In [11]:
# Loading the test data is similar, but (a) we do not apply data augmentation,
# and (b) we do not shuffle when building the mini-batches.
test_data = torchvision.datasets.KMNIST('./data', train=False, transform=T.ToTensor())
test_loader = torch.utils.data.DataLoader(test_data, batch_size=64, shuffle=False)

Initialize parameters ????!!!!!

In [12]:
#Initialize parameters

def init():
  theta = torch.randn(X.shape, requires_grad=False)
  v = torch.randn(X.shape, requires_grad=False)
  return theta, v

Define Convolutional Neural Network Class

In [13]:
from torch import nn
from torch.nn import functional as F

In [14]:
class SimpleCNN(nn.Module):
    def __init__(self, input_size):
        super().__init__()
        input_size = 1
        self.conv1 = nn.Conv2d(input_size, 8, 3, padding=1)
        self.conv2 = nn.Conv2d(8, 16, 3, padding=1)
        self.conv3 = nn.Conv2d(16, 32, 3, padding=1)
        self.conv4 = nn.Conv2d(32, 64, 3, padding=1)
        self.max_pool = nn.MaxPool2d(2)
        self.fc1 = nn.Linear(64*7*7, 1024)
        self.fc2 = nn.Linear(1024, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = self.max_pool(x)
        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        x = self.max_pool(x)
        x = x.reshape((-1, 64*7*7))
        x = F.relu(self.fc1(x))
        return self.fc2(x)

In [15]:
# We check if CUDA is available. If you do not see it,
# activate a GPU from Runtime >> Change runtime type and 
# restart the notebook.
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cpu


In [16]:
# We need to move the SimpleCNN model to the device immediately
cnn = SimpleCNN(1).to(device)

In [17]:
# Note: we also need to move data when asking for a prediction
cnn(xb.to(device)).shape

torch.Size([64, 10])

Train and evaluate the network with forward gradient

In [18]:
def accuracy(net, loader, device):
  # A function that aggregates the accuracy over all mini-batches in the loader.
  # See here for a quick-start on torchmetrics: https://torchmetrics.readthedocs.io/en/stable/pages/quickstart.html.
  #acc = torchmetrics.Accuracy().to(device)
  acc = torchmetrics.Accuracy('multiclass', num_classes=10).to(device)
  for xb, yb in loader:
      xb, yb = xb.to(device), yb.to(device)
      ypred = cnn(xb)
      _ = acc(ypred, yb)
  return acc.compute()

In [19]:
# Average accuracy at initialization is 10% (random guessing).
accuracy(cnn, test_loader, device)

tensor(0.1000)

In [20]:
# Note: it is important to move the CNN to the device before initializing the optimizer,
# since the optimizer also has a state that must be moved to the GPU.
loss = nn.CrossEntropyLoss()


In [146]:
def beale_function(x):
  return (torch.pow(torch.tensor([1.5])-x[0]+x[0]*x[1],2) + torch.pow(torch.tensor([2.25])-x[0]+x[0]*torch.pow(x[1],2),2)+torch.pow(torch.tensor([2.625])-x[0]+x[0]*torch.pow(x[1],3),2))

In [179]:
def train_fp(x, y):
  x, y = x.to(device), y.to(device)

  l_rate0 = 0.2
  f = beale_function
  theta=torch.rand(2)
  print('theta', theta)
  t=torch.tensor([0])
  while (torch.norm(theta)>1e-5) :
    l_rate=l_rate0*torch.exp(-t*1e-3)
    t=t+1
    v=torch.normal(torch.tensor([0.0, 0.0]),torch.tensor([1.0, 1.0]))
    print('v', v)
    ft=f(theta)
    print('ft', ft)
    dt=torch.tensor(torch.autograd.functional.jvp(f,theta,v)[1])
    print('dt', dt)
    gt=v*dt
    print('gt', gt)
    print('with lt', l_rate*gt)
    theta=theta - l_rate*gt
    print(t, theta)

  return theta  

In [180]:
x=torch.randn(4)
y=torch.randn(10)
theta=train_fp(x,y)
print(theta)

theta tensor([0.4255, 0.6014])
v tensor([-0.1314, -0.4687])
ft tensor([10.9372])
dt tensor([-1.5295])
gt tensor([0.2009, 0.7169])
with lt tensor([0.0402, 0.1434])
tensor([1]) tensor([0.3854, 0.4580])
v tensor([-1.2037, -1.5741])
ft tensor([10.6352])
dt tensor([4.8736])
gt tensor([-5.8665, -7.6713])
with lt tensor([-1.1721, -1.5327])
tensor([2]) tensor([1.5575, 1.9907])
v tensor([0.4141, 0.0552])
ft tensor([234.7474])
dt tensor([128.0910])
gt tensor([53.0380,  7.0768])
with lt tensor([10.5864,  1.4125])
tensor([3]) tensor([-9.0289,  0.5782])
v tensor([-0.1038,  0.8928])
ft tensor([194.5846])
dt tensor([-396.5368])
gt tensor([  41.1564, -354.0190])
with lt tensor([  8.2066, -70.5917])
tensor([4]) tensor([-17.2356,  71.1699])
v tensor([-1.1287,  0.0427])
ft tensor([3.8611e+13])
dt tensor([5.1961e+12])
gt tensor([-5.8650e+12,  2.2183e+11])
with lt tensor([-1.1683e+12,  4.4190e+10])
tensor([5]) tensor([ 1.1683e+12, -4.4190e+10])
v tensor([2.3120, 1.2013])
ft tensor([inf])
dt tensor([-inf])


  dt=torch.tensor(torch.autograd.functional.jvp(f,theta,v)[1])


In [None]:
for epoch in range(10):
  
  cnn.train()
  for xb, yb in tqdm.tqdm(train_loader):
    
    train(xb, yb)

    

  cnn.eval()
  print(f'Accuracy at epoch {epoch}: {accuracy(cnn, test_loader, device)}')