In [1]:
import torch.nn as nn
import torch

In [89]:
class NeuralNetwork(nn.Module):
    """
    Neural Network Class for implementing neural networks for different loss and optimization functions.
    
    Attribuutes:
        input_size: An integer indicating number of input features.
        output_size: An integer indicating size of output.
        hidden_layer_size: An integer indicating size of hidden layer.
        
        w1: A vector (input_size X hidden_layers_sizes[0]) of floats required for training the neural network.
        wn: A vector (hidden_layers_sizes[-1] X output_size) for weights of final layer.
        
        activations: An array of strings indicating the activation functions for every layer.
        loss: A string indicating the loss function for the neural network.
        optimizer: A string indicating the optimization algorithm to be used to train the network.
    """
    def __init__(self, input_size, output_size, hidden_layer_size, activations, loss, optimizer):
        """
        Initializes Neural Network class attributes.
        """
        super(NeuralNetwork, self)
        self.input_size = input_size
        self.output_size = output_size
        self.hidden_layer_size = hidden_layer_size
        
        self.w1 = torch.randn(self.input_size, self.hidden_layer_size)
        self.wn = torch.randn(self.hidden_layer_size, self.output_size)
    
        self.activations = activations
        self.loss = loss
        self.optimizer = optimizer
    
    
    def forward(self, X):
        """
        Forward pass of the neural network.
        """
        z = []
        a = []
        z.append(torch.matmul(X, self.w1))
        a.append(self.evaluateActivation(self.activations[0])(z[-1]))
        z.append(torch.matmul(a[-1], self.wn))
        a.append(self.evaluateActivation(self.activations[1])(z[-1]))
        return z, a
    
    
    def backward(self, X, y, z, a):
        """
        Backward Pass of the model.
        """
        dW = []
        dL_da_n = self.evaluateLossDerivative()(a[-1], y)
        da_n_dz_n = self.evaluateActivationDerivative(self.activations[1])(z[-1])
        dz_n_dWn = a[0]
        dL_dWn = torch.matmul(dz_n_dWn.T, (dL_da_n * da_n_dz_n))
        
        dz_n_da_1 = self.wn
        da_1_dz_1 = self.evaluateActivationDerivative(self.activations[0])(z[0])
        dz_1_dW1 = X
        dL_dW1 = torch.matmul(dz_1_dW1.T, (torch.matmul(dL_da_n * da_n_dz_n, dz_n_da_1.T)*da_1_dz_1))
        dW.append(dL_dW1)
        dW.append(dL_dWn)
        return dW
    
    
    def train(self, X, y, alpha, iterations):
        """
        Function to train the neural network.
        """
        funVals = []
        if self.optimizer == 'GradientDescent':
            funVals, ypred = self.GradientDescent(X, y, alpha, iterations)
        return funVals, ypred
    
    
    def GradientDescent(self, X, y, alpha, iterations):
        funVals = []
        ypred = []
        for _ in range(iterations):
            z, a = self.forward(X)
            funVals.append((self.evaluateLoss()(a[1], y)).item())
            dW = self.backward(X, y, z, a)
            self.w1 = self.w1 - alpha * dW[0]
            self.wn = self.wn - alpha * dW[1]
            ypred = a[-1]
        return funVals, ypred
    
    
    def evaluateActivation(self, activation):
        """
        Activation function
        """
        if activation == 'sigmoid' :
            return lambda z : torch.exp(z)/(1 + torch.exp(z))
        if activation == 'softmax':
            return lambda z : torch.exp(z - torch.max(z))/torch.sum(torch.exp(z - torch.max(z)), dim=0)
        if activation == 'relu':
            def relu(z):
                z1 = torch.clone(z)
                return z1.clamp(min=0)
            return relu
        if activation == 'tanh':
            return lambda z : torch.tanh(z)
        if activation == 'logit':
            return lambda z : torch.log(z/(1-z))
        return lambda z : z
    
    
    def evaluateActivationDerivative(self, activation):
        """
        Derivative of Activation Function
        """
        if activation == 'sigmoid':
            sigmoid = lambda z : torch.exp(z)/(1 + torch.exp(z))
            return lambda z : sigmoid(z) * (1 - sigmoid(z))
        if activation == 'softmax':
            pass
        if activation == 'relu':
            def relu_derivative(z):
                z1 = torch.clone(z)
                z1[z>0] = 1
                z1[z<0] = 0
                return z1
            return relu_derivative
        if activation == 'tanh':
            pass
        if activation == 'logit':
            pass
        return lambda z : 1
    
    
    def evaluateLoss(self):
        """
        Loss Function
        """
        if self.loss == 'MSE':
            return lambda ypred, y : torch.matmul((y - ypred).T, y - ypred)/(2*y.shape[0])
        if self.loss == 'Hinge':
            def hinge(y, ypred):
                yYpred = y * ypred
                return torch.sum(torch.max(torch.zeros(yYpred.shape), 1 - yYpred.T))
            return hinge
        if self.loss == 'CrossEntropyLoss':
            pass
        return lambda x : 1
    
    
    def evaluateLossDerivative(self):
        """
        Loss function Derivative
        """
        if self.loss == 'MSE':
            return lambda ypred, y : (y - ypred)/len(y)
        if self.loss == 'Hinge':
#             def hingeDerivative(y, ypred):
#                 yYpred = y * ypred
#                 temp = yYpred.clone()
#                 temp[1 > yYpred] = 1
#                 temp[1 < yYpred] = 0
#                 ymul = -1*y*temp
#                 return torch.matmul(ymul.reshape(1, -1), X).reshape(-1, 1)
#             return hingeDerivative
            pass
        if self.loss == 'CrossEntropyLoss':
            pass
        return lambda x : 1

In [90]:
X = torch.randn(1000, 10)
y = torch.randn(1000, 1)

In [91]:
model = NeuralNetwork(X.shape[1], 1, 10, ['relu', 'linear'], 'MSE', 'GradientDescent')

In [92]:
funVals, ypred = model.train(X, y, 1e-05, 200)

In [99]:
print(ypred.shape)

torch.Size([1000, 1])


In [100]:
print(y.shape)

torch.Size([1000, 1])


In [103]:
torch.matmul((ypred-y).T, (ypred-y))/(len(y)*2)

tensor([[14.2406]])