In [2]:
import torch
import numpy as np
from torch import nn
import torch.nn.functional as F

In [3]:
epsilon = 1e-6

## MSE Loss

In [182]:
def mse_loss(y_pred, y_true):
    # y_pred: n x c
    # y_true: n x c
    return ((y_true-y_pred)**2).mean()

In [183]:
# veryfication:
loss = nn.MSELoss()
input = torch.randn(3, 5, requires_grad=False)
target = torch.randn(3, 5, requires_grad=False)
output = loss(input, target)

In [184]:
input_np = input.numpy()
target_np = target.numpy()
output_np = mse_loss(input_np, target_np)

In [185]:
assert np.abs(output.item() - output_np) < epsilon, "wrong implementation"

## BCE Loss

In [186]:
def bce_loss(y_pred, y_true):
    # y_pred = n x 2 (float range (0~1))
    # y_true = n x 2 (one hot integer)
    y_pred = np.clip(y_pred, epsilon, 1-epsilon)
    return -(y_true * np.log(y_pred) + (1-y_true)*np.log(1-y_pred)).mean()

In [187]:
m = nn.Sigmoid()
loss = nn.BCELoss()
input = torch.randn(3, 2, requires_grad=False)
target = torch.rand(3, 2, requires_grad=False)
output = loss(m(input), target)

In [188]:
input_np = input.numpy()
target_np = target.numpy()
output_np = bce_loss(sigmoid(input_np), target_np)

In [189]:
assert np.abs(output.item() - output_np) < epsilon, "wrong implementation"

## NLL Loss

In [190]:
def nllloss(y_pred, y_true):
    # y_pred: n x c float array after log softmax
    # y_true: n integer array
    indices = np.arange(len(y_true)).astype(int)
    return -y_pred[indices, y_true.astype(int), ...].mean()

In [191]:
m = nn.LogSoftmax(dim=1)
loss = nn.NLLLoss()
input = m(torch.randn(3, 5, requires_grad=False))
target = torch.empty(3, dtype=torch.long).random_(5)
output = loss(input, target)

In [192]:
input_np = input.numpy()
target_np = target.numpy()
output_np = nllloss(input_np, target_np)

In [193]:
assert np.abs(output.item() - output_np) < epsilon, "wrong implementation"

## CrossEntropy Loss

In [73]:
def cross_entropy(y_pred, y_true):
    # y_pred: n x c float array
    # y_true: n integer array
    y_pred = np.log(softmax(y_pred, axis=1))
    indices = np.arange(len(y_true)).astype(int)
    return -y_pred[indices, y_true.astype(int), ...].mean()

In [258]:
loss = nn.CrossEntropyLoss()
input = torch.randn(3, 5, requires_grad=False)
target = torch.empty(3, dtype=torch.long, requires_grad=False).random_(5)
output = loss(input, target)

In [259]:
input_np = input.numpy()
target_np = target.numpy()
output_np = cross_entropy(input_np, target_np)

In [260]:
assert np.abs(output.item() - output_np) < epsilon, "wrong implementation"

## KLDiv Loss

In [261]:
def kldivloss(y_pred, y_true):
    # y_pred: n x c after log softmax
    # y_true: n x c after softmax
    return (y_true * (np.log(y_true) - y_pred)).mean()

In [262]:
kl_loss = nn.KLDivLoss(reduction="mean")
input = F.log_softmax(torch.randn(3, 5, requires_grad=False), dim=1)
target = F.softmax(torch.rand(3, 5), dim=1)
output = kl_loss(input, target)

In [263]:
input_np = input.numpy()
target_np = target.numpy()
output_np = kldivloss(input_np, target_np)

In [264]:
assert np.abs(output.item() - output_np) < epsilon, "wrong implementation"

# Focal Loss

In [20]:
def focalloss(y_pred, y_true, alpha=0.25, gamma=2.0):
    """
    Compute the focal loss between true labels and predictions.
    
    Args:
        y_true: Ground truth labels, shape (batch_size, num_classes).
        y_pred: Predicted probabilities (after softmax), shape (batch_size, num_classes).
        alpha: Balancing factor for positive vs negative examples (default is 0.25).
        gamma: Focusing parameter to emphasize harder examples (default is 2.0).
        eps: Small value to avoid log(0) (default is 1e-9).
    
    Returns:
        Focal loss value.
    """
    y_pred = np.clip(y_pred, epsilon, 1.-epsilon)
    cross_entropy = -y_true*np.log(y_pred)
    loss = alpha * (1-y_pred)**gamma * cross_entropy
    return np.mean(np.sum(loss, axis=-1))

In [21]:
y_true = np.array([[1, 0], [0, 1]])
y_pred = np.array([[0.9, 0.1], [0.2, 0.8]])

In [22]:
loss = focalloss(y_true, y_pred)
print("Focal Loss: ", loss)

Focal Loss:  0.5180806097608864


# Constrastive Loss

In [29]:
def cosinesim(u, v):
    return np.dot(u,v)/(np.linalg.norm(u) * np.linalg.norm(v))

In [36]:
def contrastloss(anchor, positive, negative, temperature=0.07):
    """
    Compute the Contrastive Loss

    Args:
        anchor: Embedding of the anchor (D)
        positive: Embedding of the positive pair (D)
        negative: Embedding of the positive pair (N, D)
        temperature: Temperature parameter

    Returns:
        contrastive loss value
    """
    sim_pos = cosinesim(anchor, positive) / temperature
    sim_neg = np.einsum("d, nd -> n", anchor, negative) / temperature
    sim_all = np.concatenate([[sim_pos], sim_neg])
    loss = sim_pos - np.log(np.sum(np.exp(sim_all)))
    return loss

In [37]:
# Example usage
anchor = np.array([0.2, 0.8])
positive = np.array([0.25, 0.75])
negatives = np.array([[0.1, 0.9], [0.9, 0.1]])

loss = contrastloss(anchor, positive, negatives)
print("contrastive loss: ", loss)

contrastive loss:  -0.025128660938365854


# InfoNCE Loss

In [85]:
def infonceloss(img_emb, text_emb, emb_size=64, temperature=0.07):
    """
    Compute the Contrastive Loss

    Args:
        img_emb: Embedding of the vision (B, D)
        text_emb: Embedding of the texts (B, D)
        temperature: Temperature parameter

    Returns:
        loss value
    """
    n = img_emb.shape[0]
    w_i = np.random.randn(img_emb.shape[1], emb_size)
    w_t = np.random.randn(text_emb.shape[1], emb_size)
    
    img_emb = np.dot(img_emb, w_i)
    text_emb = np.dot(text_emb, w_t)
    print(img_emb.shape, text_emb.shape)
    
    img_emb = img_emb / np.linalg.norm(img_emb, axis=1, keepdims=True)
    text_emb = text_emb / np.linalg.norm(text_emb, axis=1, keepdims=True)
    
    sim_matrix = np.exp(np.matmul(img_emb, text_emb.T)/temperature)

    labels = np.arange(n)
    sum_4i = np.sum(sim_matrix, axis=1, keepdims=True)
    loss_i = -np.log(sim_matrix[labels, labels] / sum_4i)
    sum_4j = np.sum(sim_matrix, axis=0, keepdims=True)
    loss_t = -np.log(sim_matrix[labels, labels] / sum_4j)
    loss = loss_i + loss_t/2
    return np.mean(loss)

In [86]:
img_emb = np.random.rand(2, 93)
text_emb = np.random.rand(2, 102)
loss = infonceloss(img_emb, text_emb)
print("infonce loss: ", loss)

(2, 64) (2, 64)
infonce loss:  1.7646046014351506
