In [83]:
import numpy as np

### definitions
**underflow:** numbers near zero are rounded to zero

**overflow:** large-magnitude numbers are approximated as $\infty$ or $-\infty$

**softmax:**

$$\text{softmax}(\boldsymbol{x})_{i} = \frac{\text{exp}(x_{i})}{\sum_{j=1}^{n} \text{exp}(x_{j})}$$

Softmax must be stabilized against underflow and overflow.

In [84]:
def softmax(x):
    print("\t(denominator =", np.sum(np.exp(x)), ")")
    return np.exp(x)/np.sum(np.exp(x))

In [85]:
# non-problematic example
x = [4.0, 6.0, 8.0]
print("x =", x, "\n")
print("softmax(x) =", softmax(x), "\n")
print("sum(softmax(x))=", np.sum(softmax(x)), "\n")

x = [4.0, 6.0, 8.0] 

	(denominator = 3438.98493057 )
softmax(x) = [ 0.01587624  0.11731043  0.86681333] 

	(denominator = 3438.98493057 )
sum(softmax(x))= 1.0 



In [86]:
# underflow example: denominator will become 0, so result is undefined
x = [-4e100, -6e100, -8e100]
print("x =", x, "\n")
print("softmax(x) =", softmax(x), "\n")
print("sum(softmax(x))=", np.sum(softmax(x)), "\n")

x = [-4e+100, -6e+100, -8e+100] 

	(denominator = 0.0 )
softmax(x) = [ nan  nan  nan] 

	(denominator = 0.0 )
sum(softmax(x))= nan 



  app.launch_new_instance()


In [87]:
# overflow example: denominator will become inf, so result is undefined
x = [4e100, 6e100, 8e100]
print("x =", x, "\n")
print("softmax(x) =", softmax(x), "\n")
print("sum(softmax(x))=", np.sum(softmax(x)), "\n")

x = [4e+100, 6e+100, 8e+100] 

	(denominator = inf )
softmax(x) = [ nan  nan  nan] 

	(denominator = inf )
sum(softmax(x))= nan 



  from ipykernel import kernelapp as app
  app.launch_new_instance()
  app.launch_new_instance()


**improved softmax**

$$\text{softmax}(\boldsymbol{z})_{i},\quad \boldsymbol{z} = \boldsymbol{x} - \text{max}_{i}x_{i}$$

In [88]:
def improved_softmax(x):
    print("\t(max =", np.max(x), ")")
    z = x - np.max(x)
    print("\t(z =", z, ")")
    print("\t(denominator =", np.sum(np.exp(z)), ")")
    return np.exp(z)/np.sum(np.exp(z))

In [89]:
# non-problematic example
x = [4.0, 6.0, 8.0]
print("x =", x, "\n")
print("improved_softmax(x) =", improved_softmax(x), "\n")
print("sum(improved_softmax(x))=", np.sum(improved_softmax(x)), "\n")

x = [4.0, 6.0, 8.0] 

	(max = 8.0 )
	(z = [-4. -2.  0.] )
	(denominator = 1.15365092213 )
improved_softmax(x) = [ 0.01587624  0.11731043  0.86681333] 

	(max = 8.0 )
	(z = [-4. -2.  0.] )
	(denominator = 1.15365092213 )
sum(improved_softmax(x))= 1.0 



In [90]:
# underflow example: denominator will become 0, so result is undefined
x = [-4e100, -6e100, -8e100]
print("x =", x, "\n")
print("improved_softmax(x) =", improved_softmax(x), "\n")
print("sum(improved_softmax(x))=", np.sum(improved_softmax(x)), "\n")

x = [-4e+100, -6e+100, -8e+100] 

	(max = -4e+100 )
	(z = [  0.00000000e+000  -2.00000000e+100  -4.00000000e+100] )
	(denominator = 1.0 )
improved_softmax(x) = [ 1.  0.  0.] 

	(max = -4e+100 )
	(z = [  0.00000000e+000  -2.00000000e+100  -4.00000000e+100] )
	(denominator = 1.0 )
sum(improved_softmax(x))= 1.0 



In [91]:
# overflow example: denominator will become inf, so result is undefined
x = [4e100, 6e100, 8e100]
print("x =", x, "\n")
print("improved_softmax(x) =", improved_softmax(x), "\n")
print("sum(improved_softmax(x))=", np.sum(improved_softmax(x)), "\n")

x = [4e+100, 6e+100, 8e+100] 

	(max = 8e+100 )
	(z = [ -4.00000000e+100  -2.00000000e+100   0.00000000e+000] )
	(denominator = 1.0 )
improved_softmax(x) = [ 0.  0.  1.] 

	(max = 8e+100 )
	(z = [ -4.00000000e+100  -2.00000000e+100   0.00000000e+000] )
	(denominator = 1.0 )
sum(improved_softmax(x))= 1.0 

