In [64]:
import torch

## Creating Layers

In [65]:
class DenseLayer:
  # Layer initialization
  def __init__(self, n_inputs, n_neurons):
    # Initialize weights and biases
    self.weights = 0.01 * torch.rand(n_inputs, n_neurons)
    self.biases = torch.zeros((1, n_neurons))

  # Forward pass
  def forward(self, inputs):
    # record the inputs
    self.inputs = inputs
    # Calculate output values from inputs, weights and biases
    self.output = torch.matmul(inputs, self.weights) + self.biases

  # Backward pass
  def backward(self, dvalues):
    # Gradients on parameters
    self.dweights = torch.dot(self.inputs.T, dvalues)
    self.dbiases = torch.sum(dvalues, axis=0, keepdims=True)
    # Gradient on values
    self.dinputs = torch.dot(dvalues, self.weights.T)

## Activation Functions

In [66]:
class Activation_ReLU:
  # Forward pass
  def forward(self, inputs):
    # Remember input values
    self.inputs = inputs
    self.output = torch.max(torch.tensor(0),inputs)
  # Backward pass
  def backward(self, dvalues):
    self.dinputs = dvalues
    # Zero gradient where input values were negative
    self.dinputs[self.inputs <= 0] = 0

In [67]:
class Activation_Softmax:
  # Forward pass
  def forward(self, inputs):
    # Get unnormalized probabilities
    exp_values = torch.exp(inputs - torch.max(inputs, axis=1, keepdim=True).values)
    # Normalize them for each sample
    probabilities = exp_values / torch.sum(exp_values, axis=1, keepdim=True)
    self.output = probabilities
  # Backward pass
  def backward(self, dvalues):
    # Create uninitialized array
    self.dinputs = torch.empty_like(dvalues)
    # Enumerate outputs and gradients
    for index, (single_output, single_dvalues) in enumerate(zip(self.output, dvalues)):
      # Flatten output array
      single_output = single_output.reshape(-1, 1)
      # Calculate Jacobian matrix of the output and
      jacobian_matrix = torch.diagflat(single_output) - torch.dot(single_output, single_output.T)
    # Calculate sample-wise gradient
    # and add it to the array of sample gradients
    self.dinputs[index] = torch.dot(jacobian_matrix, single_dvalues)

In [68]:
class Activation_Sigmoid:
  # Forward pass
  def forward(self,inputs):
    self.outputs = torch.sigmoid(inputs)

  #Backward pass
  def backward(self,dvalues):
    # Derivation of sigmoid
    self.dactivation = self.outputs*(1 - self.outputs)
    # mulitply with the input
    self.dinput = self.dactivation*dvalues



## Loss

In [69]:
class Loss_CategoricalCrossentropy() :
  # Forward pass
  def forward(self, y_pred, y_true):
    samples = len(y_pred)
    # Clip data to prevent division by 0
    # Clip both sides to not drag mean towards any value
    y_pred_clipped = torch.clip(y_pred, 1e-8, 1 - 1e-8)
    # only if categorical labels
    if len(y_true.shape) == 1:
      correct_confidences = y_pred_clipped[range(samples), y_true]
    # Mask values - only for one-hot encoded labels
    elif len(y_true.shape) == 2:
      correct_confidences = torch.sum(y_pred_clipped * y_true, axis=1)
    log_loss = -torch.log(correct_confidences)
    data_loss = torch.mean(log_loss)
    return data_loss
  # Backward pass
  def backward(self, dvalues, y_true):
    # Number of samples
    samples = len(dvalues)
    # Number of labels in every sample
    # We'll use the first sample to count them
    labels = len(dvalues[0])
    # If labels are sparse, turn them into one-hot vector
    if len(y_true.shape) == 1:
      y_true = torch.eye(labels)[y_true]
    # Calculate gradient
    self.dinputs = -y_true / dvalues
    # Normalize gradient
    self.dinputs = self.dinputs / samples

In [70]:
class MSE():
  #Forward pass
  def forward(self,y_pred,y_true):
    self.outputs = torch.mean(0.5*(y_true - y_pred)**2)
    return self.outputs

  #Backward pass
  def backward(self,y_pred, y_true):
    return y_pred - y_true

## Accuracy

In [71]:
class Accuracy():
  def calculate(self, y_pred, y_true):
    predictions = torch.argmax(y_pred, axis=1)
    if len(y_true.shape) == 2:
      y_true = torch.argmax(y_true, axis=1)
    accuracy = torch.mean((predictions == y_true).float())
    return accuracy

## Optimizers

In [72]:
class Optimizer_SGD:
  # Initialize optimizer - set settings,
  def __init__(self, learning_rate=0.01):
    self.learning_rate = learning_rate
  # Update parameters
  def update_params(self, layer):
    layer.weights += -self.learning_rate * layer.dweights
    layer.biases += -self.learning_rate * layer.dbiases

In [73]:
x = torch.rand(4,2)
y_true = torch.tensor([0,1,0,1])

In [74]:
dense_layer1 = DenseLayer(2,2)
activation1 = Activation_ReLU()
output_layer = DenseLayer(2,2)
activation2 = Activation_Sigmoid()
mse = MSE()
optimization = Optimizer_SGD()

In [75]:
epochs = 20

In [None]:
for e in range(epochs):
  for index in range(len(x)):
    row = x[index]
    #forward pass
    dense_layer1.forward(row)
    activation1.forward(dense_layer1.output)
    output_layer.forward(activation1.output)
    activation2.forward(output_layer.output)

    #error calculation
    pred = torch.argmax(activation2.outputs,axis=1)
    mse.forward(pred,y_true)

    #backward pass
    loss_grad = mse.backward(pred[0],y_true[index])
    activation2.backward(loss_grad)
    output_layer.backward(activation2.dinput)
    activation1.backward(output_layer.dinputs)
    dense_layer1.backward(activation1.dinputs)

    # update weights
    optimization.update_params(dense_layer1)
    optimization.update_params(output_layer)



