In [1]:
import numpy as np

In [14]:
from typing import Collection

def sigmoid(x: float)->float:
    return 1 / (1 + np.exp(-x))


def softmax_pure(x: Collection) -> Collection:
    '''
    纯正的softmax函数可能出现数值溢出的问题，如e的10次方，会超出数值上限
    '''
    exp_a = np.exp(x)
    sum_exp_a = np.sum(exp_a)
    y = exp_a / sum_exp_a
    return y


def softmax(x: Collection)->Collection:
    '''进行恒等变形，消除数值溢出问题'''
    max_a = np.max(x)
    exp_a = np.exp(x - max_a)
    sum_exp_a = np.sum(exp_a)
    y = exp_a / sum_exp_a
    return y




print(sigmoid(10), sigmoid(0.5), sigmoid(0.2))
l1 = [0.3, 2.9, 4.0]
print("=== l1 ===")
print("softmax pure: ", softmax_pure(l1))
print("softmax", softmax(l1))

l2 = [100000, 20, 30]
print("=== l2 ===")
print("softmax pure: ", softmax_pure(l2))
print("softmax", softmax(l2))

0.9999546021312976 0.6224593312018546 0.549833997312478
=== l1 ===
softmax pure:  [0.01821127 0.24519181 0.73659691]
softmax [0.01821127 0.24519181 0.73659691]
=== l2 ===
softmax pure:  [nan  0.  0.]
softmax [1. 0. 0.]


  exp_a = np.exp(x)
  y = exp_a / sum_exp_a


In [3]:
def init_network():
    network = {}
    # input 1x2
    # W1: 2 x 3
    network['W1'] = np.array([
        [0.1, 0.3, 0.5],
        [0.2, 0.4, 0.6]
    ])
    # b1 1x3
    network['b1'] = np.array([0.1, 0.2, 0.3])
    # W2: 3x2
    network['W2'] = np.array([
        [0.1, 0.2],
        [0.3, 0.4],
        [0.5, 0.6]
    ])
    # b2: 1x2
    network['b2'] = np.array([0.1, 0.3])
    # W3: 2x2
    network['W3'] = np.array([
        [0.1, 0.2],
        [0.3,0.4]
    ])
    # b3: 1x2
    network['b3'] = np.array([0.1, 0.3])    
    return network

In [5]:
def forward(network, x):
    W1, W2, W3 = network['W1'], network['W2'], network['W3']
    b1, b2, b3 = network['b1'], network['b2'], network['b3']

    a1 = np.dot(x, W1) + b1
    # 激活函数
    z1 = sigmoid(a1)

    print("z1 shape: ", z1.shape)
    a2 = np.dot(z1, W2) + b2
    z2 = sigmoid(a2)

    print("z2 shape: ", z2.shape)

    a3 = np.dot(z2, W3) + b3
    z3 = sigmoid(a3)
    print("z3 shape: ", z3.shape)

    return z3


network = init_network()

res = forward(network=network, x= [0.1, 0.2])
print("res: ", res)

z1 shape:  (3,)
z2 shape:  (2,)
z3 shape:  (2,)
res:  [0.59510458 0.67342385]


# Minist

In [None]:
import sys, os
sys.path.append(os.pardir)
from dataset.mnist import load_mnist

# 训练集，训练标签，测试集，测试标签
(x_train, t_train), (x_test, t_test) = load_mnist(flatten=True, normalize=False)

print(x_train.shape)
print(t_train.shape)
print(x_test.shape)
print(t_test.shape)

(60000, 784)
(60000,)
(10000, 784)
(10000,)


In [20]:
# show image
# coding: utf-8
import sys, os
sys.path.append(os.pardir)  # 为了导入父目录的文件而进行的设定
import numpy as np
from dataset.mnist import load_mnist
from PIL import Image


def img_show(img):
    pil_img = Image.fromarray(np.uint8(img))
    pil_img.show()

(x_train, t_train), (x_test, t_test) = load_mnist(flatten=True, normalize=False)

img = x_train[0]
label = t_train[0]
print(label)  # 5

print(img.shape)  # (784,)
img = img.reshape(28, 28)  # 把图像的形状变为原来的尺寸
print(img.shape)  # (28, 28)

img_show(img)


5
(784,)
(28, 28)
