# Problem 1 

Draw the computational graph for the following function. Then compute wr.grad, wi.grad, and wo.grad using backpropagation.

In [None]:
import math
class ag: # AutoGrad
    def log(input):
        output = ag.Scalar(math.log(input.value), inputs=[input], op="log")

        def _backward():
            input.grad += output.grad / input.value
            return None

        output._backward = _backward
        return output

    def exp(input):

        output = ag.Scalar(math.exp(input.value), inputs=[input], op="exp")

        def _backward():
            input.grad += output.grad * output.value
            return None

        output._backward = _backward
        return output

    def relu(input):
        output = ag.Scalar(max(0, input.value), inputs=[input], op="relu")

        def _backward():
            if input.value > 0:
                input.grad += output.grad

        output._backward = _backward
        return output


    class Scalar: # Scalars with grads
        def __init__(self,  value, op="", _backward= lambda : None, inputs=[], label=""):

            self.value = float(value)
            self.grad = 0.0

            self._backward = _backward
            self.inputs = inputs

            self.op = op
            self.label = label


        def topological_sort(self):
            topo_order = []
            visited = set()

            def dfs(node):
                if node not in visited:
                    visited.add(node)
                    for input in node.inputs:
                        dfs(input)
                    topo_order.append(node)

            dfs(self)
            return topo_order

        def backward(self):
            self.grad = 1.0

            topo_order = self.topological_sort()

            for node in reversed(topo_order):
                node._backward()


        def __add__(self, other):
            if not isinstance(other, ag.Scalar):
                other = ag.Scalar(other, label=f"{other}\nconst")

            output = ag.Scalar(self.value + other.value,
                               inputs=[self, other], op="add")

            def _backward():
                # pass
                self.grad += output.grad
                other.grad += output.grad

            output._backward = _backward
            return output


        def __mul__(self, other):
            assert isinstance(other, ag.Scalar)
            output = ag.Scalar(self.value * other.value, inputs=[self, other], op="mul")

            def _backward():
                self.grad += other.value * output.grad
                other.grad += self.value * output.grad

                return None

            output._backward = _backward

            return output

        def __pow__(self, exponent): # exponent is just a python float
            output = ag.Scalar(self.value ** exponent, inputs=[self], op=f"pow({exponent})")

            def _backward():
                self.grad += (exponent * self.value**(exponent-1)) * output.grad
                return None

            output._backward = _backward
            return output

        def __neg__(self): # exponent is just a python float
            output = ag.Scalar(-self.value, inputs=[self], op=f"neg")

            def _backward():
                self.grad += (-1) * output.grad
                return None

            output._backward = _backward
            return output
        
        def __repr__(self) -> str:
            return str("val:" + str(self.value) + ", grad:" + str(self.grad))



In [4]:
x1 = ag.Scalar(2.0, label="z1\nleaf(x1)") 
h0 = ag.Scalar(3.0, label="z2\nleaf(h0)") 
wr = ag.Scalar(4.0, label="z3\nleaf(wr)") 
wi = ag.Scalar(5.0, label="z4\nleaf(wi)") 
wo = ag.Scalar(6.0, label="z5\nleaf(wo)")

z1 = x1
z2 = h0
z3 = wr
z4 = wi
z5 = z3*z2 # wr∗h0 
z6 = z4*z1 # wi∗x1 
z7 = z5+z6
z8 = ag.relu(z7) # relu(wr∗h0 + wi∗x1) 
z9 = wo
z10 = z8*z9
z10 . backward ()
print(wr.grad, wi.grad, wo.grad)

18.0 12.0 22.0
