In [1]:
import numpy as np
import torch

import autograd
from autograd import backend

In [2]:
# OR Gate
xs = [[0, 0], [0, 1], [1, 0], [1, 1]]
ys = [0, 1, 1, 1]

# Torch OR Gate
txs = torch.tensor(xs)
tys = torch.tensor(ys)

In [3]:
class AutogradManualModel:
    def __init__(self, nin, layer_dims=[3,4,1], activation='relu'):
        self.nin = nin
        self.layer_dims = [nin] + layer_dims
        self.w = []
        for i in range(1, len(self.layer_dims)):
            self.w.append([[autograd.Variable(np.random.uniform(-0.1, 0.1)) for i in range(self.layer_dims[i-1])] for _ in range(self.layer_dims[i])])
        
        self.b = [autograd.Variable(np.random.uniform(-0.1, 0.1)) for i in range(len(layer_dims))]

        if activation == 'relu':
            self.activation = autograd.relu
        elif activation == 'sigmoid':
            self.activation = autograd.sigmoid
        else:
            raise ValueError(f'available activations (relu, sigmoid). Recieved: {activation}')

    @property
    def weights(self):
        return backend.flatten(self.w)
    
    @property
    def biases(self):
        return backend.flatten(self.b)


    def __call__(self, x):
        inputs = x

        # Define the graph
        current_operations = []
        total_operations = []
        for i, (layer_weights, layer_biases) in enumerate(zip(self.w, self.b)):
            for curr_w in layer_weights:
                out = sum([w * inp for w, inp in zip(curr_w, inputs)]) + layer_biases
                # we do not want to add sigmoid to the last layer
                if i != len(self.w) - 1:
                    out = self.activation(out)
                current_operations.append(out)
                total_operations.append(out)
            inputs = current_operations
            current_operations = []

        return out

In [4]:
class TorchManualModel:
    def __init__(self, nin, layer_dims=[3,4,1], activation='relu'):
        self.nin = nin
        self.layer_dims = [nin] + layer_dims
        self.w = []
        for i in range(1, len(self.layer_dims)):
            self.w.append([[torch.tensor(np.random.uniform(-0.1, 0.1), requires_grad=True) for i in range(self.layer_dims[i-1])] for _ in range(self.layer_dims[i])])
        
        self.b = [torch.tensor(np.random.uniform(-0.1, 0.1), requires_grad=True) for i in range(len(layer_dims))]

        if activation == 'relu':
            self.activation = torch.relu
        elif activation == 'sigmoid':
            self.activation = torch.sigmoid
        else:
            raise ValueError(f'available activations (relu, sigmoid). Recieved: {activation}')
    
    @property
    def weights(self):
        return backend.flatten(self.w)
    
    @property
    def biases(self):
        return backend.flatten(self.b)


    def __call__(self, x):
        inputs = x
        current_operations = []
        for i, (layer_weights, layer_biases) in enumerate(zip(self.w, self.b)):
            for curr_w in layer_weights:
                out = sum([w * inp for w, inp in zip(curr_w, inputs)]) + layer_biases
                # we do not want to add sigmoid to the last layer
                if i != len(self.w) - 1:
                    out = self.activation(out)
                current_operations.append(out)
            inputs = current_operations
            current_operations = []
        return out

In [5]:
aml = AutogradManualModel(2, [5, 5, 1])
tml = TorchManualModel(2, [5, 5, 1])

In [6]:
# set the weights to be the same
# in pytorch and autograd.

for torch_weight, autograd_weight in zip(backend.flatten(tml.w), backend.flatten(aml.w)):
    torch_weight.data = torch.tensor(autograd_weight.data)
    torch_weight.requires_grad = True

for torch_bias, autograd_bias in zip(tml.b, aml.b):
    torch_bias.data = torch.tensor(autograd_bias.data)
    torch_bias.requires_grad = True

In [7]:
def train(autograd_model, torch_model, epochs=20):
    lr = 0.01
    for i in range(1, epochs+1):
        outs = []
        for x in xs:
            out = autograd_model(x)
            outs.append(out)

        # for clarity and auto-completion
        loss: autograd.Node = None
        for o, y in zip(outs, ys):
            if loss is None:
                loss = (y - o)**2
            else:
                loss += (y - o)**2
        
        loss = loss / len(ys)

        loss_val = loss.forward()
        print(f'Epoch {i}: autograd_loss_val={loss_val: .4f}', end='  ')
        
        loss.backward(autograd_model.w)
        loss.backward(autograd_model.b)

        # Here we do not need to implement something like `zero_grad()` 
        # because in autograd the gradients are set to `zero` every `backward` call.
        for w in autograd_model.weights:
            w._data += -lr * w.gradients

            # Here we should reset the graph of the weights
            # because it will be re-constructed in every call
            # this behaviour can be disabled if we replaced the input
            # layer with a placeholder and process the inputs batch by batch
            # otherwise if we sent multiple inputs independetly the graph values 
            # from the past inputs will be overwritten by the new passed inputs
            w.outcoming_nodes = []
        
        for b in autograd_model.biases:
            b._data += -lr * b.gradients
            b.outcoming_nodes = []

        # instead of doing w.outcoming_nodes = [] and b.outcoming_nodes = []
        # you can call these functions:
        # backend.reset_weights_graph(autograd_model.w)
        # backend.reset_weights_graph(autograd_model.b)

        torch_outs = []
        for x in xs:
            out = tml(x)
            torch_outs.append(out)

        # for clarity and auto-completion
        torch_loss: torch.Tensor = None
        for o, y in zip(torch_outs, ys):
            if torch_loss is None:
                torch_loss = (y - o)**2
            else:
                torch_loss += (y - o)**2
        
        torch_loss = torch_loss / len(ys)

        torch_loss.backward(retain_graph=True)
        print(f'torch_loss_val={torch_loss.item(): .4f}')


        weights = backend.flatten(torch_model.w)
        biases = backend.flatten(torch_model.b)

        for w in weights:
            w.data += -lr * w.grad
            # equivalent to `model.zero_grad(set_to_none=True)` or `optimizer.zero_grad(set_to_none=True)`
            w.grad = None
        
        for b in biases:
            b.data += -lr * b.grad
            # equivalent to `model.zero_grad(set_to_none=True)` or `optimizer.zero_grad(set_to_none=True)`
            b.grad = None

In [8]:
train(aml, tml)

Epoch 1: autograd_loss_val= 0.8765  torch_loss_val= 0.8765
Epoch 2: autograd_loss_val= 0.8490  torch_loss_val= 0.8490
Epoch 3: autograd_loss_val= 0.8226  torch_loss_val= 0.8226
Epoch 4: autograd_loss_val= 0.7972  torch_loss_val= 0.7972
Epoch 5: autograd_loss_val= 0.7728  torch_loss_val= 0.7728
Epoch 6: autograd_loss_val= 0.7494  torch_loss_val= 0.7494
Epoch 7: autograd_loss_val= 0.7269  torch_loss_val= 0.7269
Epoch 8: autograd_loss_val= 0.7053  torch_loss_val= 0.7053
Epoch 9: autograd_loss_val= 0.6846  torch_loss_val= 0.6846
Epoch 10: autograd_loss_val= 0.6646  torch_loss_val= 0.6646
Epoch 11: autograd_loss_val= 0.6455  torch_loss_val= 0.6455
Epoch 12: autograd_loss_val= 0.6271  torch_loss_val= 0.6271
Epoch 13: autograd_loss_val= 0.6094  torch_loss_val= 0.6094
Epoch 14: autograd_loss_val= 0.5925  torch_loss_val= 0.5925
Epoch 15: autograd_loss_val= 0.5762  torch_loss_val= 0.5762
Epoch 16: autograd_loss_val= 0.5605  torch_loss_val= 0.5605
Epoch 17: autograd_loss_val= 0.5455  torch_loss_v

In [1]:
import numpy as np
import torch

import autograd
from autograd import backend

In [12]:
x = autograd.Variable(-2.)
y = torch.tensor(-2., requires_grad=True)

In [13]:
z = torch.relu(y)

In [14]:
sig = autograd.relu(x)

In [15]:
sig.forward()

0.0

In [16]:
z

tensor(0., grad_fn=<ReluBackward0>)

In [17]:
sig.backward(x)

In [18]:
print(f'{x.gradients: .4f}')

 0.0000


In [19]:
z.backward()

In [20]:
y.grad

tensor(0.)

In [1]:
import autograd
x = autograd.Variable(5.)
y = autograd.Variable(6.)
z = autograd.Variable(7.)

a1 = autograd.add(x, y, name="first_add")
a2 = autograd.multiply(a1, x, name='first_mul')
a3 = autograd.multiply(a2, y, name='second_mul')
a4 = autograd.multiply(a3, a2, name='third_mul')
a5 = autograd.add(a4, z, name='second_add')
a6 = autograd.divide(a5, y, name='first_div')
a7 = autograd.multiply(a6, x, name='fourth_mul')
a8 = autograd.sin(a7, name='sin_op')

In [2]:
l = []
visited = set()
def fun(r):
    def _fun(r):
        for i in r.outcoming_nodes:
            c = (i, r) 
            if c in visited:
                # l.append(c)
                pass
            else:
                l.append(c)
                visited.add(c)
            _fun(i)
    _fun(r)

In [3]:
l = []
visited = set()
def fun(r):
    def _fun(r):
        for i in r.outcoming_nodes:
            c = (i, r) 
            if c not in visited:
                l.append(c)
                visited.add(c)
        
        for i in r.outcoming_nodes:
            fun(i)
    _fun(r)

In [4]:
fun(x)

In [5]:
l

[(first_add, <Variable1>),
 (first_mul, <Variable1>),
 (fourth_mul, <Variable1>),
 (first_mul, first_add),
 (second_mul, first_mul),
 (third_mul, first_mul),
 (third_mul, second_mul),
 (second_add, third_mul),
 (first_div, second_add),
 (fourth_mul, first_div),
 (sin_op, fourth_mul)]

In [39]:
order

{(first_add, <Variable1>): 1,
 (first_mul, first_add): 1,
 (second_mul, first_mul): 2,
 (third_mul, second_mul): 2,
 (second_add, third_mul): 4,
 (first_div, second_add): 4,
 (fourth_mul, first_div): 4,
 (sin_op, fourth_mul): 5,
 (third_mul, first_mul): 2,
 (first_mul, <Variable1>): 1,
 (fourth_mul, <Variable1>): 1}

In [19]:
z.backward()

AttributeError: 'Variable' object has no attribute 'backward'

In [10]:
sig.backward(x)

<SigmoidOperation1>
[]
<ExpOperation3>
[]
<MultiplyOperation2>
[]
<MultiplyOperation2>
[]
<SigmoidOperation1>
[(<MultiplyOperation2>, <Variable1>), (<ExpOperation3>, <MultiplyOperation2>), (<SigmoidOperation1>, <ExpOperation3>)]
<AddOperation4>
[(<MultiplyOperation2>, <Variable1>), (<ExpOperation3>, <MultiplyOperation2>), (<SigmoidOperation1>, <ExpOperation3>)]
<AddOperation4>
[(<MultiplyOperation2>, <Variable1>), (<ExpOperation3>, <MultiplyOperation2>), (<SigmoidOperation1>, <ExpOperation3>)]
<ExpOperation3>
[(<MultiplyOperation2>, <Variable1>), (<ExpOperation3>, <MultiplyOperation2>), (<SigmoidOperation1>, <ExpOperation3>)]
<MultiplyOperation2>
[(<MultiplyOperation2>, <Variable1>), (<ExpOperation3>, <MultiplyOperation2>), (<SigmoidOperation1>, <ExpOperation3>)]
<MultiplyOperation2>
[(<MultiplyOperation2>, <Variable1>), (<ExpOperation3>, <MultiplyOperation2>), (<SigmoidOperation1>, <ExpOperation3>)]
<SigmoidOperation1>
[(<MultiplyOperation2>, <Variable1>), (<ExpOperation3>, <MultiplyO

In [9]:
sig._nodes

[<ExpOperation3>, <AddOperation4>, <DivideOperation5>]

In [15]:
sig.incoming_nodes.insert(0, sig._nodes[-3])

In [16]:
sig.get_incoming_nodes()

[<ExpOperation3>, <ExpOperation3>, <Variable1>]

In [10]:
x.gradients

0.10499358540350662

In [13]:
y.grad

tensor(0.1050)

In [11]:
x.incoming_nodes

AttributeError: 'Variable' object has no attribute 'incoming_nodes'