In [11]:
from typing import Callable
import numpy as np
import matplotlib.pyplot as plt


def show_function(data, func: Callable):
    fig, ax = plt.subplots(figsize=(3, 2.7), layout='constrained')
    ax.plot(data, func(data))


### 均方误差 mean squared error

In [4]:
def mean_squared_error(y, t):
    return np.sum(np.square(y - t)) / 2

t = np.array([0, 0, 1, 0, 0, 0, 0, 0, 0, 0])
y = np.array([0.1, 0.05, 0.6, 0.0, 0.05, 0.1, 0.0, 0.1, 0.0, 0.0])
mean_squared_error(y, t)

0.09750000000000003

### 交叉熵误差 cross entropy error

In [43]:
def cross_entropy_error(p: np.array, q: np.array):
    """
    :param p: 真实数据
    :param q: 预测数据
    :return: 交叉熵
    """
    epsilon = 1e-7
    return -np.sum(p * np.log(q+epsilon))

p = np.array([0, 0, 1, 0, 0, 0, 0, 0, 0, 0])
q = np.array([0.1, 0.05, 0.6, 0.0, 0.05, 0.1, 0.0, 0.1, 0.0, 0.0])
cross_entropy_error(p, q)

0.510825457099338

In [56]:
# mini-batch one-hot版本 
def cross_entropy_error(p: np.ndarray, q: np.ndarray):
    """
    :param p: 真实数据
    :param q: 预测数据
    :return: 交叉熵
    """
    if q.ndim == 1:
        p = p.reshape(1, p.size)
        q = q.reshape(1, q.size)
    
    batch_size = p.shape[0]
    epsilon = 1e-7
    return -np.sum(p * np.log(q+epsilon)) / batch_size

p = np.array([[0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
              [0, 0, 1, 0, 0, 0, 0, 0, 0, 0]])
q = np.array([[0.1, 0.05, 0.6, 0.0, 0.05, 0.1, 0.0, 0.1, 0.0, 0.0],
              [0.1, 0.05, 0.6, 0.0, 0.05, 0.1, 0.0, 0.1, 0.0, 0.0]])
cross_entropy_error(p, q)

0.510825457099338

In [67]:
#非one-hot版本 
def cross_entropy_error(p: np.ndarray, q: np.ndarray):
    """
    :param p: 真实数据 e.g. [[0.1, 0.2, 0.7], [0.1, 0.8, 0.1], [0.9, 0.05, 0.05],]
    :param q: 预测数据 e.g. [2, 1, 0]
    :return: 交叉熵
    """
    batch_size = p.shape[0]
    epsilon = 1e-7

    tmp_sum = 0
    for x in range(batch_size):
        q_x = 1 # 概率为1
        p_x = p[x, q[x]]
        tmp_sum += -np.sum(q_x * np.log(p_x + epsilon))
    
    return tmp_sum / batch_size

p = np.array([[0.1, 0.2, 0.7], [0.1, 0.8, 0.1], [0.9, 0.05, 0.05]])
q = np.array([2, 1, 0])
cross_entropy_error(p, q)

0.2283928773141796

In [79]:
#非one-hot mini-batch版本
def cross_entropy_error(p: np.ndarray, q: np.ndarray):
    """
    :param p: 真实数据 e.g. [[0.1, 0.2, 0.7], [0.1, 0.8, 0.1], [0.9, 0.05, 0.05],]
    :param q: 预测数据 e.g. [2, 1, 0]
    :return: 交叉熵
    """
    batch_size = p.shape[0]
    epsilon = 1e-7
    return -np.sum(np.log(p[np.arange(batch_size), q] + epsilon)) / batch_size

p = np.array([[0.1, 0.2, 0.7], [0.1, 0.8, 0.1], [0.9, 0.05, 0.05]])
q = np.array([2, 1, 0])
cross_entropy_error(p, q), p[np.arange(3), q] # 高级索引: 第一个index表示要选择的行, 第二个表示要选择的行的对应的列

(0.2283928773141796, array([0.7, 0.8, 0.9]))

In [84]:
# 结合
def cross_entropy_error(p: np.ndarray, q: np.ndarray):
    """
    :param p: 真实数据 e.g. [[0.1, 0.2, 0.7], [0.1, 0.8, 0.1], [0.9, 0.05, 0.05],]
    :param q: 预测数据 e.g. [2, 1, 0]
    :return: 交叉熵
    """
    batch_size = p.shape[0]
    epsilon = 1e-7
    if q.size == p.size:
        q = q.argmax(axis=1)
    return -np.sum(np.log(p[np.arange(batch_size), q] + epsilon)) / batch_size

p1 = np.array([[0.1, 0.2, 0.7], [0.1, 0.8, 0.1], [0.9, 0.05, 0.05]])
q1 = np.array([2, 1, 0])

p2 = np.array([[0.1, 0.05, 0.6, 0.0, 0.05, 0.1, 0.0, 0.1, 0.0, 0.0],
              [0.1, 0.05, 0.6, 0.0, 0.05, 0.1, 0.0, 0.1, 0.0, 0.0]])
q2 = np.array([[0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
              [0, 0, 1, 0, 0, 0, 0, 0, 0, 0]])
cross_entropy_error(p1, q1), cross_entropy_error(p2, q2)

(0.2283928773141796, 0.510825457099338)