In [None]:
import numpy as np
def SoftMax_forward(input, normalization=False):
  outputs = {} #dictionary of outputs layers
  outputs['input'] = np.array(input) #first layer is input
  if normalization: #if normalization required
    norm = np.array([np.max(input)] * len(input)) #find maximal value
    norm = input - norm #subtract from each value, so maximal value after exponent will be 1
    outputs['input'] = np.array(norm) #change input to normalized one
  outputs['exp'] = np.exp(outputs['input']) #take exponent in power of input for each value
  outputs['sum'] = np.array([np.sum(outputs['exp'])] * len(outputs['exp'])) #find sum of exponent values
  outputs['div'] = 1 / outputs['sum'] # division by sum for each exponent value
  outputs['out'] = outputs['exp'] * outputs['div'] #multiplication of each value and division result
  return outputs #outputs is list of output for each layer

In [None]:
#loss function required for backward propagation
#in my case I use function that returns array with 1 for biggest number, and 0s for others
def loss_fn(array):
  max_val = np.argmax(array)
  out = np.zeros(len(array))
  out[max_val] = 1
  return out

In [None]:
def SoftMax_backward(input, loss_fn, normalization=False):
  outputs = SoftMax_forward(input, normalization) #applying forward propagation
  bw_outputs = [] #list of outputs
  bw_outputs.append(loss_fn(outputs['out'])) #calculating loss for each output
  bw_outputs.append(np.max(outputs['out'] / outputs['div'] * bw_outputs[0])) #backward from out to 1/x
  bw_outputs.append(bw_outputs[-1] * (-1 / (outputs['sum'] * outputs['sum']))) # backward from 1/x to copy
  bw_outputs.append(outputs['out'] / outputs['exp'] * bw_outputs[0]) #backward to copy from out
  bw_outputs.append(bw_outputs[-1]+bw_outputs[-2]) #backward from copy to exp
  bw_outputs.append(bw_outputs[-1] * np.exp(outputs['input'])) #backward to input
  return bw_outputs #list of backwards for each layer





#Let's test these functions

In [None]:
import torch
import torch.nn as nn
a = [1., 2., 3.]
f = SoftMax_forward(a)
b = SoftMax_backward(a, loss_fn)
a = torch.tensor(a, requires_grad=True)

In [None]:
print('Mine softmax forward output:', f['out'])
print('Torch softmax forward output:', nn.Softmax(dim=0).forward(a).tolist())

Mine softmax forward output: [0.09003057 0.24472847 0.66524096]
Torch softmax forward output: [0.09003057330846786, 0.2447284758090973, 0.6652409434318542]


Loss computed by loss function

In [None]:
loss = loss_fn(f['out'])
loss

array([0., 0., 1.])

In [None]:
loss = torch.tensor(loss)
res = nn.functional.softmax(a, dim=0).backward(loss)
print('Mine softmax backward output:', b[-1])
print('Torch softmax backward output:', a.grad.tolist())

Mine softmax backward output: [-0.05989202 -0.1628034   0.22269543]
Torch softmax backward output: [-0.05989202484488487, -0.16280339658260345, 0.22269542515277863]


In [None]:
import numpy as np
a = ['str', 'erere', 'aqeqeaa', 'a']
len(max(a, key=len))

7