# Add Solution

In [None]:
class Add(Node):
    def __init__(self, x, y):
        Node.__init__(self, [x, y])

    def forward(self):
        self.value = self.input_nodes[0].value + self.input_nodes[1].value

    def backward(self):
        self.dvalues = {n: 0 for n in self.input_nodes}
        if len(self.output_nodes) == 0:
            self.dvalues[self.input_nodes[0]] += 1
            self.dvalues[self.input_nodes[1]] += 1
            return
        for n in self.output_nodes:
            dval = n.dvalues[self]
            self.dvalues[self.input_nodes[0]] += 1 * dval
            self.dvalues[self.input_nodes[1]] += 1 * dval

## Forward Pass

We take the value from both input nodes and add them together.

## Backward Pass

We first initialize or reset the dvalue for each input node to its zero value. For a scalar this is simply 0, for an array it's an array of 0s. The derivative for $+$ is 1 for both inputs. We loop over all the output nodes and accumulate the gradients using the chain rule. Remember we consider all paths from the output to the input.

The majority of the code for the backward pass will look the same for the rest of the solutions. The differences will be the number of input nodes and the derivatives.

# Mul Solution

In [None]:
class Mul(Node):
    def __init__(self, x, y):
        Node.__init__(self, [x, y])

    def forward(self):
        self.cache[0] = self.input_nodes[0].value
        self.cache[1] = self.input_nodes[1].value
        self.value = self.cache[0] * self.cache[1]

    def backward(self):
        self.dvalues = {n: 0 for n in self.input_nodes}
        if len(self.output_nodes) == 0:
            self.dvalues[self.input_nodes[0]] += self.cache[1]
            self.dvalues[self.input_nodes[1]] += self.cache[0]
            return
        for n in self.output_nodes:
            dval = n.dvalues[self]
            self.dvalues[self.input_nodes[0]] += self.cache[1] * dval
            self.dvalues[self.input_nodes[1]] += self.cache[0] * dval

## Forward pass

Similar to the Add node except we multiply the inputs this time. We also store the inputs values in the cache for use in the backward pass.

## Backward pass

Recall the derivatives from the lab

$$
\frac {\partial Mul}{\partial x} = y
\hspace{0.5in}
\frac {\partial Mul}{\partial y} = x
$$

We use these in the backwards pass.

```
self.dvalues[self.input_nodes[0]] += self.cache[1] * dval
self.dvalues[self.input_nodes[1]] += self.cache[0] * dval
```