<a href="https://colab.research.google.com/github/davidraamirez/GradientWithoutBackpropagation/blob/main/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Gradient Without Backpropagation

In [81]:
import torch
import tensorflow_datasets as tfds
from sklearn.model_selection import train_test_split
import tqdm
import torch.distributions as distr

In [82]:
%pip install torchmetrics --quiet

In [83]:
import torchmetrics
import torchvision
from torchvision import transforms as T

Loading and preprocessing the dataset

In [84]:
#Load the dataset
train_data = torchvision.datasets.KMNIST('./data', train=True, download=True)

In [85]:
# Alternative way to get the first element: image, label = next(iter(train_data))
for image, label in train_data:
  break

In [86]:
# Simple transformation that converts the PIL image to a PyTorch array
T.ToTensor()(image).shape

torch.Size([1, 28, 28])

In [87]:
# T.Compose allows to chain together multiple transformations
train_transforms = T.Compose([
    T.ToTensor()
])

In [88]:
# This loads data with both data conversion.
train_data = torchvision.datasets.KMNIST('./data', train=True, transform=train_transforms)

In [89]:
# Loaders are used to shuffle, batch, and possibly sample the elements of the dataset
train_loader = torch.utils.data.DataLoader(train_data, batch_size=64, shuffle=True)

In [90]:
xb, yb = next(iter(train_loader))
print(xb.shape)
print(yb.shape)

torch.Size([64, 1, 28, 28])
torch.Size([64])


In [91]:
# Loading the test data is similar, but (a) we do not apply data augmentation,
# and (b) we do not shuffle when building the mini-batches.
test_data = torchvision.datasets.KMNIST('./data', train=False, transform=T.ToTensor())
test_loader = torch.utils.data.DataLoader(test_data, batch_size=64, shuffle=False)

Define Convolutional Neural Network Class

In [92]:
from torch import nn
from torch.nn import functional as F

In [102]:
class SimpleCNN(nn.Module):
    def __init__(self, input_size, conv1w, conv1b, conv2w, conv2b, conv3w, conv3b, conv4w, conv4b, fc1w, fc1b, fc2w, fc2b,):
        super().__init__()
        input_size = 1
        self.conv1 = nn.Conv2d(input_size, 8, 3, padding=1)
        self.conv1.weight = torch.nn.Parameter(conv1w)
        self.conv1.bias = torch.nn.Parameter(conv1b)

        self.conv2 = nn.Conv2d(8, 16, 3, padding=1)
        self.conv2.weight = torch.nn.Parameter(conv2w)
        self.conv2.bias = torch.nn.Parameter(conv2b)

        self.conv3 = nn.Conv2d(16, 32, 3, padding=1)
        self.conv3.weight = torch.nn.Parameter(conv3w)
        self.conv3.bias = torch.nn.Parameter(conv3b)

        self.conv4 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv4.weight = torch.nn.Parameter(conv4w)
        self.conv4.bias = torch.nn.Parameter(conv4b)

        self.max_pool = nn.MaxPool2d(2)
        self.fc1 = nn.Linear(64*7*7, 1024)
        self.fc1.weight = torch.nn.Parameter(fc1w)
        self.fc1.bias = torch.nn.Parameter(fc1b)

        self.fc2 = nn.Linear(1024, 10)
        self.fc2.weight = torch.nn.Parameter(fc2w)
        self.fc2.bias = torch.nn.Parameter(fc2b)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = self.max_pool(x)
        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        x = self.max_pool(x)
        x = x.reshape((-1, 64*7*7))
        x = F.relu(self.fc1(x))
        return self.fc2(x)

In [103]:
# We check if CUDA is available. If you do not see it,
# activate a GPU from Runtime >> Change runtime type and 
# restart the notebook.
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


Initialize the parameters

In [104]:
# We initialize the parameters randomly and the model with an input size
conv1w = torch.randn((8, 1, 3, 3), requires_grad=False)
conv1b = torch.randn(8, requires_grad=False)
conv2w = torch.randn((16, 8, 3, 3), requires_grad=False)
conv2b = torch.randn(16, requires_grad=False)
conv3w = torch.randn((32, 16, 3, 3), requires_grad=False)
conv3b = torch.randn(32, requires_grad=False)
conv4w = torch.randn((64, 32, 3, 3), requires_grad=False)
conv4b = torch.randn(64, requires_grad=False)
fc1w = torch.randn((1024, 3136), requires_grad=False)
fc1b = torch.randn(1024, requires_grad=False)
fc2w = torch.randn((10, 1024), requires_grad=False)
fc2b = torch.randn(10, requires_grad=False)
cnn = SimpleCNN(1, conv1w, conv1b, conv2w, conv2b, conv3w, conv3b, conv4w, conv4b, fc1w, fc1b, fc2w, fc2b).to(device)

In [106]:
# Note: we also need to move data when asking for a prediction
cnn(xb.to(device)).shape

torch.Size([64, 10])

Train and evaluate the network with forward gradient

In [107]:
def accuracy(net, loader, device):
  # A function that aggregates the accuracy over all mini-batches in the loader.
  # See here for a quick-start on torchmetrics: https://torchmetrics.readthedocs.io/en/stable/pages/quickstart.html.
  #acc = torchmetrics.Accuracy().to(device)
  acc = torchmetrics.Accuracy('multiclass', num_classes=10).to(device)
  for xb, yb in loader:
      xb, yb = xb.to(device), yb.to(device)
      ypred = cnn(xb)
      _ = acc(ypred, yb)
  return acc.compute()

In [108]:
# Average accuracy at initialization is 10% (random guessing).
accuracy(cnn, test_loader, device)

tensor(0.0963, device='cuda:0')

DEFINE CROSS_ENTROPY

In [109]:
# Note: it is important to move the CNN to the device before initializing the optimizer,
# since the optimizer also has a state that must be moved to the GPU.
loss = nn.CrossEntropyLoss()


In [110]:
def beale_function(x):
  return (torch.pow(torch.tensor([1.5])-x[0]+x[0]*x[1],2) + torch.pow(torch.tensor([2.25])-x[0]+x[0]*torch.pow(x[1],2),2)+torch.pow(torch.tensor([2.625])-x[0]+x[0]*torch.pow(x[1],3),2))

In [111]:
def rosenbrock_function(x):
  sum=0
  for p in x.size():
    for i in range (x.size(1)-1):
      sum += (100*torch.pow(x[i+1] - torch.pow(x[i], 2), 2) + torch.pow(x[i]-1, 2))
  return sum

In [112]:
def train_fp(x, y):
  x, y = x.to(device), y.to(device)

  l_rate0 = 0.025
  f = rosenbrock_function

  #Parameters
  conv1w = torch.randn((8, 3, 3), requires_grad=False)
  conv1b = torch.randn(8, requires_grad=False)
  conv2w = torch.randn((16, 8, 3, 3), requires_grad=False)
  conv2b = torch.randn(16, requires_grad=False)
  conv3w = torch.randn((32, 16, 3, 3), requires_grad=False)
  conv3b = torch.randn(32, requires_grad=False)
  conv4w = torch.randn((64, 32, 3, 3), requires_grad=False)
  conv4b = torch.randn(64, requires_grad=False)
  fc1w = torch.randn((1024, 3136), requires_grad=False)
  fc1b = torch.randn(1024, requires_grad=False)
  fc2w = torch.randn((10, 1024), requires_grad=False)
  fc2b = torch.randn(10, requires_grad=False) 

  error = 1

  t=torch.tensor([0])

  while (error>1e-10) :

    t=t+1
    v=torch.normal(torch.tensor([0.0, 0.0]),torch.tensor([1.0, 1.0]))
    ft=f(conv1w,conv1b,conv2w,conv2b,conv3w,conv3b,conv4w,conv4b,fc1w,fc1b,fc2w,fc2b)
    dt=torch.tensor(torch.autograd.functional.jvp(f,theta1,v)[1])
    gt=v*dt
    theta0 = theta1
    theta1 = theta1 - l_rate*gt

  return theta1  

In [113]:
x= torch.tensor([2, 4, 8, 16])
y=torch.tensor([1,2,3,4,5,6,7,8,9,10])
theta=train_fp(x,y)















print(theta)

UnboundLocalError: ignored

In [None]:
for epoch in range(3):

  cnn.train()
  for i in range(10):
    xb, yb = next(iter(train_loader))
    xb = xb.to(device)
    yb = yb.to(device)
    ypred = cnn(xb)
    l = loss(ypred, yb)

    #Update cnn parameters
    #Recalculate ypred and loss
    #MIRAR NN_LAB_LOGISITC_REGRESSION
    #CALCULAR G(THETA) QUE ES EL GRADIENTE Y APLICARLO A LOS PARAMETROS DEL CNN, LOS WEIGHTS

  cnn.eval()
  print(f'Accuracy at epoch {epoch}: {accuracy(cnn, test_loader, device)}')