<a href="https://colab.research.google.com/github/davidraamirez/GradientWithoutBackpropagation/blob/main/LogisticRegression_fwd_gradient.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [30]:
import torch
import tensorflow_datasets as tfds
from sklearn.model_selection import train_test_split
import tqdm
import torch.distributions as distr

In [31]:
%pip install torchmetrics --quiet

In [32]:
import torchmetrics
import torchvision
from torchvision import transforms as T

Loading and preprocessing the data

In [33]:
#Load the dataset
penguins = tfds.load('penguins', as_supervised=True, split='train')

In [34]:
# By default, the Dataset object is an iterator over the elements.
# The instructions below extract the underlying tensors.
X, y = penguins.batch(500).get_single_element()
X, y = X.numpy(), y.numpy()

In [35]:
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, stratify=y)

In [36]:
Xtrain = torch.from_numpy(Xtrain).float()
Xtest = torch.from_numpy(Xtest).float()

In [37]:
ytrain = torch.from_numpy(ytrain).long()
ytest = torch.from_numpy(ytest).long()

Define Logistic Regression

In [38]:
from torch import nn
from torch.nn import functional as F

In [39]:
class SimpleLogisticRegression(nn.Module):
  def __init__(self, input_size, w, b):
    super().__init__()
    self.weight = nn.Parameter(w)
    self.bias = nn.Parameter(b)


  def forward(self, x):
    x = x.reshape(1, -1)
    return torch.softmax(x@self.weight + self.bias, 1)

In [40]:
# We check if CUDA is available. If you do not see it,
# activate a GPU from Runtime >> Change runtime type and 
# restart the notebook.
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


Initialize the parameters

In [41]:
# We initialize the parameters randomly and the model with an input size
w = torch.randn((4, 3), requires_grad=False)
b = torch.randn((3, ), requires_grad=False)
LG = SimpleLogisticRegression(4, w, b).to(device)

In [42]:
# Note: we also need to move data when asking for a prediction
print(LG(Xtrain[0].to(device)))

tensor([[0.1518, 0.0547, 0.7934]], device='cuda:0', grad_fn=<SoftmaxBackward0>)


Train and evaluate the network with forward gradient

In [43]:
def accuracy(ytrue, ypred):
  return (ypred.argmax(1) == ytrue).float().mean()

In [44]:
# Average accuracy at initialization is 10% (random guessing).
accuracy(ytrain[0].to(device), LG(Xtrain[0].to(device)))

tensor(1., device='cuda:0')

Define Cross Entropy

In [45]:
def cross_entropy(ytrue, ypred):
  """ Cross-entropy loss.
  Inputs:
  - ytrue (n,): vector of indices for the correct class.
  - ypred (n, 3): predictions of the model.
  Returns the average cross-entropy.
  """
  # This is called integer array indexing in NumPy:
  # https://numpy.org/doc/stable/user/basics.indexing.html#integer-array-indexing
  return - ypred[torch.arange(0, ypred.shape[0]), ytrue].log().mean()

In [46]:
def beale_function(x):
  return (torch.pow(torch.tensor([1.5])-x[0]+x[0]*x[1],2) + torch.pow(torch.tensor([2.25])-x[0]+x[0]*torch.pow(x[1],2),2)+torch.pow(torch.tensor([2.625])-x[0]+x[0]*torch.pow(x[1],3),2))

In [47]:
def rosenbrock_function(x):
  sum=0
  for i in range (x.size(0) -1):
    sum += (100*torch.pow(x[i+1] - torch.pow(x[i], 2), 2) + torch.pow(1-x[i], 2))
  return sum

In [48]:
def sphere_function(x):
  sum=0
  for i in range(x.size(0)):
    sum += torch.pow(x[i], 2)
  return sum

In [49]:
from functorch import jvp

In [50]:
def train_fwd_gradient(x, y):
  x, y = x.to(device), y.to(device)

  l_rate0 = 0.0001
  f = sphere_function

  #Parameters
  w = torch.FloatTensor(4, 3)
  #w = torch.div(w, torch.norm(w, 2))

  b = torch.FloatTensor(3, )
  #b = torch.div(b, torch.norm(b, 2))

  LG = SimpleLogisticRegression(4, w, b).to(device)
  w1 = w.reshape(-1)
  #w1 = torch.cat((w1, b), 0)
  error=0
  for i in range (x.size(0)):
    if (LG(x[i]).argmax(1)- y[i])!=0:
        error = error+ 1
  error= error / x.size(0)
  print(error)
  t=torch.tensor([0])

  while (error>0.1) :

    t=t+1
    vw1=torch.randn(w1.shape)
    vw1 = (vw1 - torch.mean(vw1))/torch.std(vw1)
    vb=torch.randn(b.shape)
    vb=(vb - torch.mean(vb))/torch.std(vb)

    ftw1, dtw1 = torch.tensor(jvp(f,(w1, ), (vw1, )))
    print(ftw1)
    ftb, dtb = torch.tensor(jvp(f,(b, ), (vb, )))
    gtw1=vw1*dtw1
    gtb = vb*dtb
    w1 = w1 - l_rate0*gtw1
    b = b - l_rate0*gtb


    w = w1.reshape(-1, 3)
    LG = SimpleLogisticRegression(4, w, b).to(device)
    error=0
    for i in range (x.size(0)):
      if (LG(x[i]).argmax(1)- y[i])!=0:
        error = error+ 1
    error= error / x.size(0)
    print (error)
  return w, b

In [51]:
w, b = train_fwd_gradient(Xtrain, ytrain)

0.8
tensor(inf)
0.8
tensor(inf)
0.8
tensor(inf)
0.8
tensor(inf)
0.8
tensor(inf)
0.796
tensor(inf)
0.796
tensor(inf)
0.796
tensor(inf)
0.796
tensor(inf)
0.796
tensor(inf)
0.796
tensor(inf)
0.796
tensor(inf)
0.796
tensor(inf)
0.796
tensor(inf)
0.796
tensor(inf)
0.796
tensor(inf)
0.796
tensor(inf)
0.796
tensor(inf)
0.796
tensor(inf)
0.796
tensor(inf)
0.796
tensor(inf)
0.796
tensor(inf)
0.8
tensor(inf)
0.8
tensor(inf)
0.796
tensor(inf)
0.796
tensor(inf)
0.796
tensor(inf)
0.796
tensor(inf)
0.796
tensor(inf)
0.796
tensor(inf)
0.796
tensor(inf)
0.8
tensor(inf)
0.8
tensor(inf)
0.8
tensor(inf)
0.8
tensor(inf)
0.8
tensor(inf)
0.8
tensor(inf)
0.8
tensor(inf)
0.8
tensor(inf)
0.8
tensor(inf)
0.8
tensor(inf)
0.8
tensor(inf)
0.8
tensor(inf)
0.8
tensor(inf)
0.8
tensor(inf)
0.8
tensor(inf)
0.8
tensor(inf)
0.8
tensor(inf)
0.8
tensor(inf)
0.8
tensor(inf)
0.8
tensor(inf)
0.8
tensor(inf)
0.8
tensor(inf)
0.8
tensor(inf)
0.8
tensor(inf)
0.8
tensor(inf)
0.8
tensor(inf)
0.8
tensor(inf)
0.8
tensor(inf)
0.8
tens

KeyboardInterrupt: ignored

In [None]:
for epoch in range(1):

  LG.train()
  for i in range(1):
    xb, yb = next(iter(train_loader))
    xb = xb.to(device)
    yb = yb.to(device)

    w, b = train_fwd_gradient(xb, yb)
    LG = SimpleLogisticRegression(1, w, b)