In [15]:
import math


class Variable:
    def __init__(self, val: float):
        self.val = val
        self.grad = 0
        self.backward_fn = None

    def backward(self, grad):
        if self.backward_fn:
            self.backward_fn(grad)
        else:
            self.grad += grad
    
    def zero_grad(self):
        self.grad = 0
    
    def __add__(self, y):
        o = Variable(self.val + y.val)
        o.backward_fn = AddBackwardFn(self, y, o.val)
        return o

    def __sub__(self, y):
        o = Variable(self.val - y.val)
        o.backward_fn = SubBackwardFn(self, y, o.val)
        return o

    def __mul__(self, y):
        o = Variable(self.val * y.val)
        o.backward_fn = MulBackwardFn(self, y, o.val)
        return o

    def __truediv__(self, y):
        o = Variable(self.val / y.val)
        o.backward_fn = DivBackwardFn(self, y, o.val)
        return o

# Some functions
def exp(x: Variable):
    o = Variable(math.exp(x.val))
    o.backward_fn = ExpBackwardFn(x, o.val)
    return o


def log(x: Variable):
    o = Variable(math.log(x.val))
    o.backward_fn = LogBackwardFn(x, o.val)
    return o


def pow(x: Variable, e: float):
    o = Variable(math.pow(x.val, e))
    o.backward_fn = PowBackwardFn(x, o.val, e)
    return o

# Activation Functions
def sigmoid(x):
    o = Variable(1 / (1 + np.exp(-x.val)))
    o.backward_fn = SigmoidBackward(x,o)
    return o
def tanh(x):
    o = Variable((1 - np.exp(-x.val)) / (1 + np.exp(-x.val)))
    o.backward_fn = TanhBackward(x,o)
    return o

class AddBackwardFn:
    def __init__(self, x: Variable, y: Variable, o: float) -> None:
        self.x = x
        self.y = y
        self.o = o

    def __call__(self, grad):
        self.x.backward(grad)
        self.y.backward(grad)


class SubBackwardFn:
    def __init__(self, x: Variable, y: Variable, o: float) -> None:
        self.x = x
        self.y = y
        self.o = o

    def __call__(self, grad):
        self.x.backward(grad)
        self.y.backward(-grad)


class MulBackwardFn:
    def __init__(self, x: Variable, y: Variable, o: float) -> None:
        self.x = x
        self.y = y
        self.o = o

    def __call__(self, grad):
        self.x.backward(self.y.val * grad)
        self.y.backward(self.x.val * grad)


class DivBackwardFn:
    def __init__(self, x: Variable, y: Variable, o: float) -> None:
        self.x = x
        self.y = y
        self.o = o

    def __call__(self, grad):
        self.x.backward(grad/self.y.val)
        self.y.backward(- self.x.val * grad / self.y.val ** 2)


class ExpBackwardFn:
    def __init__(self, x: Variable, o: float) -> None:
        self.x = x
        self.o = o

    def __call__(self, grad):
        self.x.backward(grad * self.o)


class LogBackwardFn:
    def __init__(self, x: Variable, o: float) -> None:
        self.x = x
        self.o = o

    def __call__(self, grad):
        self.x.backward(grad / self.x.val)


class PowBackwardFn:
    def __init__(self, x: Variable, o: float, e: float) -> None:
        self.x = x
        self.o = o
        self.e = e

    def __call__(self, grad):
        self.x.backward(grad * self.e * self.o / self.x.val)

class SigmoidBackward():
    def __init__(self,x,o):
        self.x = x
        self.o = o
    def __call__(self,l):
        self.x.backward(self.o.val*(1 - self.o.val) * l)

class TanhBackward():
    def __init__(self,x,o):
        self.x = x
        self.o = o
    def __call__(self,l):
        self.x.backward(l * (1 - self.o.val ** 2) / 2)

In [16]:
x = Variable(4.)
y = Variable(3.)
z = Variable(6.)


w = pow(x * y, 2) + z / x

w.backward(1.)

print(x.grad)
print(y.grad)
print(z.grad)

71.625
96.0
0.25


In [41]:
# Linear Layer
class MyLinear():
    def __init__(self, in_features,out_features):
        self.W = []
        self.b = []
        self.in_features = in_features
        self.out_features = out_features
        for i in range(in_features):
            tmp = []
            for j in range(out_features):
                tmp.append(Variable(np.random.randn()))
            self.W.append(tmp)
        for i in range(out_features):
            self.b.append(Variable(np.random.randn()))
    def forward(self, x):
        o = []
        for row in x:
            tmp = []
            for j in range(self.out_features):
                s = Variable(0)
                for i in range(self.in_features):
                    s = s + self.W[i][j] * row[i]
                s += self.b[j]
                tmp.append(s)
            o.append(tmp)
        return o
    def parameters(self):
        params = []
        for row in self.W:
            params += row
        params += self.b
        return params

# Sigmoid Layer
class MySigmoid():
    def __init__(self):
        pass
    def forward(self,x):
        o = []
        for row in x:
            tmp = []
            for e in row:
                tmp.append(sigmoid(e))
            o.append(tmp)
        return o
    def parameters(self):
        return []

# Sequential Layer
class MySequential():
    def __init__(self,layers=[]):
        self.layers = layers
    def forward(self,x):
        o = x        
        for l in self.layers:
            o = l.forward(o)
        return o
    def parameters(self):
        params = []
        for l in self.layers:
            params += l.parameters()
        return params
    def __call__(self,x):
        return self.forward(x)

In [26]:
# Mean Square Error Loss
class MyMSELoss():
    def __init__(self):
        pass
    def __call__(self,y, t):
        N = len(y)
        s = Variable(0)
        for row_y, row_t in zip(y,t):
            for yi,ti in zip(row_y,row_t):
                s += pow(yi - ti,2)
        s /= Variable(N)
        return s


In [42]:
# Stochastic Gradient Descent Optimizer
class MySGD():
    def __init__(self,parameters=[],lr=0.1):
        self.lr = lr
        self.parameters = parameters
    def step(self):
        for p in self.parameters:
            p.val -= self.lr * p.grad
    def zero_grad(self):
        for p in self.parameters:
            p.zero_grad()

In [46]:
# Creating our model
model = MySequential(layers=[
    MyLinear(2,3),
    MySigmoid(),
    MyLinear(3,2)
])

# Defining our loss function
crit = MyMSELoss()

# Defining our optimizer (SGD)
optim = MySGD(parameters=model.parameters(),lr=0.1)

In [47]:
# Dummy data 
x = [[Variable(1),Variable(1)]]
y = [[Variable(3),Variable(2)]]

In [48]:
for epoch in range(5):
    o = model(x)
    optim.zero_grad()
    l = crit(o,y)
    print("loss = ",l.val)
    l.backward(1)
    optim.step()

loss =  5.359910108787881
loss =  0.6672714578224375
loss =  0.08051778575292434
loss =  0.009484404234313596
loss =  0.0011072909441212065
