# COURSE: A deep understanding of deep learning
## SECTION: Math prerequisites
### LECTURE: Entropy and cross-entropy
#### TEACHER: Mike X Cohen, sincxpress.com
##### COURSE URL: udemy.com/course/deeplearning_x/?couponCode=202401

In [9]:
# import libraries
import numpy as np
import matplotlib.pyplot as plt

# Reminder of entropy:

$$H(p) = -\sum_x p(x)\log(p(x))$$

In [5]:
# probability of an event happening
p = .25

# NOT the correct formula!
H = -( p*np.log(p) )
print('Wrong entropy: ' + str(H))

Wrong entropy: 0.34657359027997264


In [None]:
# if P = 0.25 is the probability of the event happening
# then the probability of the event not hapenning is 1-P = 0.75
# minimum for a distribution where the event happens or not should contain atlease 2 probability
# so x = [p, 1-p] in case of a 2 state system.

In [6]:
# the correct way to compute entropy
x = [.25,.75]

H = 0
for p in x:
  H -= p*np.log(p) # or H += -p*np.log(p)

print('Correct entropy: ' + str(H))

Correct entropy: 0.5623351446188083


In [7]:
# also correct, written out for N=2 events

# Binary cross-entropy
H = -( p*np.log(p) + (1-p)*np.log(1-p) )
print('Correct entropy: ' + str(H))

Correct entropy: 0.5623351446188083


# Cross-entropy

In [10]:
# note: all probs must sum to 1! # attained using the softmax function
p = [   1,0   ] # sum=1
q = [ .25,.75 ] # sum=1

H = 0
for i in range(len(p)):
  H -= p[i]*np.log(q[i])

print('Cross entropy: ' + str(H))

Cross entropy: 1.3862943611198906


In [11]:
# also correct, written out for N=2 events
H = -( p[0]*np.log(q[0]) + p[1]*np.log(q[1]) )
print('Correct entropy: ' + str(H))

# simplification
H = -np.log(q[0])
print('Manually simplified: ' + str(H))

Correct entropy: 1.3862943611198906
Manually simplified: 1.3862943611198906


In [16]:
# now using pytorch
import torch
import torch.nn.functional as F

# note: inputs must be Tensors
q_tensor = torch.Tensor(q)
p_tensor = torch.Tensor(p)

ce=F.binary_cross_entropy(q_tensor,p_tensor) # swapped in tensor
# first input is the model output
# second input is the training data
print(ce)

tensor(1.3863)


'2.0.1+cu118'