<a href="https://colab.research.google.com/github/Vaibhavrathore1999/ML-building-blocks/blob/main/Cross_Entropy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [29]:
import numpy as np
import matplotlib.pyplot as plt

In [30]:
def entropy(p):
  p=np.array(p)
  p=p[p>0]
  return -np.sum(p*np.log2(p))

In [31]:
def safe_log2(x):
    return np.log2(x + 1e-12)  # small constant to avoid log(0)

def joint_entropy(joint_pxy):
    joint_pxy = np.array(joint_pxy)
    mask = joint_pxy > 0
    return -np.sum(joint_pxy[mask] * safe_log2(joint_pxy[mask]))

# Cross-Entropy:
“How many bits (or nats) you need to encode messages drawn from p, using your model q?”

In [32]:
def cross_entropy(p,q):
  p=np.array(p)
  q=np.array(q)
  p=p[p>0]
  q=q[q>0]
  return -np.sum(p*np.log2(q))

# KL Divergence = Cross-Entropy - Entropy

KL Divergence:
“How many extra bits you waste using your model
q instead of the optimal encoding from the oracle’s
p?

In [33]:
def KL_Divergence(p,q):
  p=np.array(p)
  q=np.array(q)
  p=p[p>0]
  q=q[q>0]
  return np.sum(p*np.log2(p/q))

In [34]:
def jensen_shannon(p, q):
    p = np.array(p)
    q = np.array(q)
    m = 0.5 * (p + q)
    return 0.5 * KL_Divergence(p, m) + 0.5 * KL_Divergence(q, m)

In [35]:
true_dist=[0.1,0.3,0.2,0.4]
pred_dist=[0.2,0.3,0.4,0.1]
print(entropy(true_dist))
print("Cross Entropy-->",cross_entropy(true_dist,pred_dist))
print("KL Divergence-->",KL_Divergence(true_dist,pred_dist))
print("Jensen Shannon Divergence",jensen_shannon(true_dist,pred_dist))

1.8464393446710154
Cross Entropy--> 2.3464393446710154
KL Divergence--> 0.5
Jensen Shannon Divergence 0.10628485095363911


In [36]:
joint_pxy = [
    [0.2, 0.3],   # P(X=0, Y=0) and P(X=0, Y=1)
    [0.1, 0.4]    # P(X=1, Y=0) and P(X=1, Y=1)
]
print("Joint Entropy-->",joint_entropy(joint_pxy))

Joint Entropy--> 1.8464393446652445


# Cross Entropy Loss
## Binary Cross Entropy Loss

In [37]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [38]:
def binary_cross_entropy_loss(logits,targets):
  logits=np.array(logits)
  preds=sigmoid(logits)
  targets=np.array(targets)
  return -np.mean(targets*np.log(preds+1e-12)+(1-targets)*np.log(1-preds+1e-12))

In [39]:
# For n examples
logits = np.array([0.2, 2.0, -1.0])      # model scores
targets = np.array([0, 1, 0])            # true labels (binary)

loss = binary_cross_entropy_loss(logits, targets)
print(f"Binary Cross-Entropy Loss: {loss:.4f}")

Binary Cross-Entropy Loss: 0.4128


## Multi - Class Cross Entropy Loss

In [40]:
def softmax(logits):
    e_logits = np.exp(logits - np.max(logits))  # Shape ---> (N,C)
    return e_logits / np.sum(e_logits, axis=-1, keepdims=True)      # Shape ---> (N,C)

In [41]:
def cross_entropy_loss(logits,targets):
  # Shape --> logits --> (N,C)
  # Shape targets -----> (N,)
  logits=np.array(logits)
  preds=softmax(logits)         # Shape preds --> (N,C)
  targets=np.array(targets)     # Shape targets --- > (N,)
  correct_log_probs = []
  for i in range(len(targets)):
      correct_class = targets[i]         # the correct class index for example i
      prob = preds[i][correct_class]         # get predicted probability for correct class
      loss = -np.log(prob)                   # negative log-likelihood
      correct_log_probs.append(loss)

  correct_log_probs = np.array(correct_log_probs)
  loss = np.sum(correct_log_probs) / len(targets)
  return loss


In [42]:
probs = [
    [0.7, 0.2, 0.1],   # sample 1
    [0.1, 0.8, 0.1],   # sample 2
    [0.2, 0.1, 0.7]    # sample 3
]
true_labels = [0, 1, 2]  # correct class for each sample
print("Cross Entropy Loss",cross_entropy_loss(probs,true_labels))

Cross Entropy Loss 0.7418752462591555
