In [1]:
import numpy as np
import math

In [2]:
class Activation(object):

    """
    Interface for activation functions (non-linearities).

    In all implementations, the state attribute must contain the result,
    i.e. the output of forward.
    """

    # No additional work is needed for this class, as it acts like an
    # abstract base class for the others

    # Note that these activation functions are scalar operations. I.e, they
    # shouldn't change the shape of the input.

    def __init__(self):
        self.state = None

    def __call__(self, x):
        return self.forward(x)

    def forward(self, x):
        raise NotImplemented

    def derivative(self):
        raise NotImplemented

In [3]:
class Identity(Activation):

    """
    Identity function (already implemented).
    """

    # This class is a gimme as it is already implemented for you as an example

    def __init__(self):
        super(Identity, self).__init__()

    def forward(self, x):
        self.state = x
        return x

    def derivative(self):
        return 1.0

In [4]:
class Sigmoid(Activation):

    """
    Sigmoid non-linearity
    """

    # Remember do not change the function signatures as those are needed
    # to stay the same for AutoLab.

    def _init_(self):
        super(Sigmoid, self)._init_()

    def forward(self, x):
        self.state= 1 / (1 + math.exp(-x))
        return self.state

    def derivative(self):
        return self.state*(1-self.state)

sigmoid=Sigmoid()
print(sigmoid.forward(1))
print(sigmoid.derivative())

0.7310585786300049
0.19661193324148185


In [11]:
class Tanh(Activation):

    """
    Tanh non-linearity
    """

    def _init_(self):
        super(Tanh, self)._init_()

    def forward(self, x):
        self.state=np.tanh(x)
        return self.state

    def derivative(self):
        return 1-(self.state*self.state)
tanh=Tanh()
print(tanh.forward(1))
print(tanh.derivative())

0.7615941559557649
0.41997434161402614


In [12]:
class ReLU(Activation):

    """
    ReLU non-linearity
    """

    def _init_(self):
        super(ReLU, self)._init_()

    def forward(self, x):
       self.state=max(0,x)
       return self.state

    def derivative(self):
        return 1.0 if self.state>0 else 0.0

relu=ReLU()
print(relu.forward(1))
print(relu.derivative())

1
1.0


In [None]:
class Linear():
    def __init__(self, in_feature, out_feature, weight_init_fn, bias_init_fn):

        """
        Argument:
            W (np.array): (in feature, out feature)
            dW (np.array): (in feature, out feature)
            momentum_W (np.array): (in feature, out feature)

            b (np.array): (1, out feature)
            db (np.array): (1, out feature)
            momentum_B (np.array): (1, out feature)
        """

        self.W = weight_init_fn(in_feature, out_feature)
        self.b = bias_init_fn(out_feature)

        # TODO: Complete these but do not change the names.
        self.dW = np.zeros(None)
        self.db = np.zeros(None)

        self.momentum_W = np.zeros(None)
        self.momentum_b = np.zeros(None)

    def __call__(self, x):
        return self.forward(x)

    def forward(self, x):
        """
        Argument:
            x (np.array): (batch size, in feature)
        Return:
            out (np.array): (batch size, out feature)
        """
        raise NotImplemented

    def backward(self, delta):

        """
        Argument:
            delta (np.array): (batch size, out feature)
        Return:
            out (np.array): (batch size, in feature)
        """
        raise NotImplemented

In [7]:
# The following Criterion class will be used again as the basis for a number
# of loss functions (which are in the form of classes so that they can be
# exchanged easily (it's how PyTorch and other ML libraries do it))

class Criterion(object):
    """
    Interface for loss functions.
    """

    # Nothing needs done to this class, it's used by the following Criterion classes

    def _init_(self):
        self.logits = None
        self.labels = None
        self.loss = None

    def _call_(self, x, y):
        return self.forward(x, y)

    def forward(self, x, y):
        raise NotImplemented

    def derivative(self):
        raise NotImplemented

In [9]:
class SoftmaxCrossEntropy(Criterion):
    """
    Softmax loss
    """

    def _init_(self):
        super(SoftmaxCrossEntropy, self)._init_()

    def forward(self, x, y):
        """
        Argument:
            x (np.array): (batch size, 10)
            y (np.array): (batch size, 10)
        Return:
            out (np.array): (batch size, )
        """
        self.logits = x
        self.labels = y
        max_logits = np.max(x, axis=1, keepdims=True)
        stabilized_logits = x - max_logits
        log_sum_exp = np.log(np.sum(np.exp(stabilized_logits), axis=1, keepdims=True))
        self.softmax = np.exp(stabilized_logits - log_sum_exp)
        log_softmax = stabilized_logits - log_sum_exp
        loss = -np.sum(y * log_softmax, axis=1)
        self.loss = loss
        return loss

    def derivative(self):
        """
        Return:
            out (np.array): (batch size, 10)
        """

        batch_size = self.labels.shape[0]
        derivative = (self.softmax - self.labels) / batch_size
        
        return derivative

In [10]:
class Linear():
    def _init_(self, in_feature, out_feature, weight_init_fn, bias_init_fn):

        """
        Argument:
            W (np.array): (in feature, out feature)
            dW (np.array): (in feature, out feature)
            momentum_W (np.array): (in feature, out feature)

            b (np.array): (1, out feature)
            db (np.array): (1, out feature)
            momentum_B (np.array): (1, out feature)
        """

        self.W = weight_init_fn(in_feature, out_feature)
        self.b = bias_init_fn(out_feature)

        # TODO: Complete these but do not change the names.
        self.dW = np.zeros((in_feature, out_feature))
        self.db = np.zeros((1, out_feature))

        self.momentum_W = np.zeros((in_feature, out_feature))
        self.momentum_b = np.zeros((1, out_feature))

    def _call_(self, x):
        return self.forward(x)

    def forward(self, x):
        """
        Argument:
            x (np.array): (batch size, in feature)
        Return:
            out (np.array): (batch size, out feature)
        """
        self.x=x
        out = np.dot(x, self.W) + self.b
        raise out

    def backward(self, delta):

        """
        Argument:
            delta (np.array): (batch size, out feature)
        Return:
            out (np.array): (batch size, in feature)
        """
        batch_size = self.x.shape[0]
        
        # Compute gradients of the loss w.r.t. weights and biases
        self.dW = np.dot(self.x.T, delta) / batch_size
        self.db = np.sum(delta, axis=0, keepdims=True) / batch_size
        
        # Compute the gradient of the loss w.r.t. input to this layer
        dx = np.dot(delta, self.W.T)
        raise dx

In [14]:
class MLP(object):
    """
    A simple multilayer perceptron
    """

    def _init_(self, input_size, output_size, hiddens, activations, weight_init_fn,
                 bias_init_fn, criterion, lr):

        # Don't change this -->
        self.train_mode = True
        self.nlayers = len(hiddens) + 1
        self.input_size = input_size
        self.output_size = output_size
        self.activations = activations
        self.criterion = criterion
        self.lr = lr
        # <---------------------

        # Initialize and add all your linear layers into the list 'self.linear_layers'
        sizes = [input_size] + hiddens + [output_size]
        self.linear_layers = [Linear(sizes[i], sizes[i + 1], weight_init_fn, bias_init_fn) for i in range(len(sizes) - 1)]

    def forward(self, x):
        """
        Argument:
            x (np.array): (batch size, input_size)
        Return:
            out (np.array): (batch size, output_size)
        """
        self.input = x
        for i in range(len(self.linear_layers)):
            x = self.linear_layers[i].forward(x)
            x = self.activations[i].forward(x)
        self.output = x
        return self.output

    def zero_grads(self):
        # Use numpyArray.fill(0.0) to zero out your backpropped derivatives in each
        # of your linear layers.
        for layer in self.linear_layers:
            layer.dW.fill(0.0)
            layer.db.fill(0.0)

    def step(self):
        # Apply a step to the weights and biases of the linear layers.
        for layer in self.linear_layers:
            layer.W -= self.lr * layer.dW
            layer.b -= self.lr * layer.db

    def backward(self, labels):
        # Backpropagate through the activation functions and linear layers.
        delta = self.criterion.derivative()
        for i in reversed(range(self.nlayers)):
            delta = self.activations[i].derivative() * delta
            delta = self.linear_layers[i].backward(delta)

    def error(self, labels):
        return (np.argmax(self.output, axis=1) != np.argmax(labels, axis=1)).sum()

    def total_loss(self, labels):
        return self.criterion(self.output, labels).sum()

    def _call_(self, x):
        return self.forward(x)

    def train(self):
        self.train_mode = True

    def eval(self):
        self.train_mode = False