# Naive Sotmax


In [1]:
import numpy as np

In [2]:
def naive_softmax(x):
  x = np.array(x)
  return np.exp(x)/sum(np.exp(x))

In [3]:
ans = naive_softmax([2,3,5,2,4,0,-1,2])
assert sum(ans) == 1

# Normalized Softmax

In [4]:
def normal_softmax(x):
  x = np.array(x)
  y = np.exp(x-np.max(x))
  return y/sum(y)

In [5]:
ans_norm = naive_softmax([2,3,5,2,4,0,-1,2])
assert sum(ans_norm) == 1
assert np.all(ans == ans_norm)

# Online Softmax

- Calculating $\max{x_i}$ in one pass as always
- **Calculating Normalization term in one pass!**

In [9]:
def online_softmax(x):
  x = np.array(x)
  M0 = -np.inf
  M1 = -np.inf
  d = 0
  for i in range(len(x)):
    M1, M0 = max(M1,x[i]), M1
    d = d*np.exp(M0-M1) + np.exp(x[i]-M1)
  assert (M1 == max(x))
  print(M1, d)
  return np.exp(x - M1)/d

In [10]:
ans_online = online_softmax([2,3,5,2,4,0,-1,2])
assert np.abs(sum(ans_online) - 1) < 1e-10
assert np.all(np.abs(ans - ans_online) < 1e-10)

5 1.6617926286873983


# Sotmax operation



In [17]:
class MaxNorm:
  def __init__(self, m, d):
    self.m = m
    self.d = d
  def __add__(self, mn):
    return MaxNorm(max(self.m,mn.m), self.d*np.exp(self.m-max(self.m,mn.m)) + mn.d*np.exp(mn.m-max(self.m,mn.m)))



In [20]:
v = np.array([2,3,5,2,4,0,-1,2])
x = MaxNorm(v[0],1)
for i in range(1,len(v)):
  x = x + MaxNorm(v[i],1)

assert x.m == 5 and x.d == 1.6617926286873983
