## 损失函数

### 均方误差

$
E\,\,=\,\,\frac{1}{2}\sum_k{\left( y_k-t_k \right) ^2}
$
（针对单个数据）

In [22]:
def mean_squared_error(y,t):
    return 0.5 * np.sum((y-t)**2)

### 交叉熵损失

$
E\,\,=\,\,-\sum_k{t_k\log y_k}
$
（针对单个数据）

In [23]:
def cross_entropy_error(y,t):
    delta = 1e-7
    return -np.sum(t * np.log(y+delta))

### mini-batch学习 

In [24]:
# 读取数据
import numpy as np
import gzip
import struct
# 读取 MNIST 图像文件
import os

def read_images(file):
    with gzip.open(file, 'rb') as f:
        # 读取文件头部的元数据（大端字节序）
        magic, num_images, rows, cols = struct.unpack('>IIII', f.read(16))
        # 读取图像数据
        images = np.frombuffer(f.read(), dtype=np.uint8).reshape(num_images, rows * cols)
        return images

# 读取 MNIST 标签文件
def read_labels(file):
    with gzip.open(file, 'rb') as f:
        # 读取文件头部的元数据（大端字节序）
        magic, num_labels = struct.unpack('>II', f.read(8))
        # 读取标签数据
        labels = np.frombuffer(f.read(), dtype=np.uint8)
        return labels

# 文件路径
train_images_file = 'F:/XFW-hub/Learning/train-images-idx3-ubyte.gz'
train_labels_file = 'F:/XFW-hub/Learning/train-labels-idx1-ubyte.gz'
test_images_file = 'F:/XFW-hub/Learning/t10k-images-idx3-ubyte.gz'
test_labels_file = 'F:/XFW-hub/Learning/t10k-labels-idx1-ubyte.gz'

# 读取训练和测试数据
x_train = read_images(train_images_file)
t_train = read_labels(train_labels_file)
x_test = read_images(test_images_file)
t_test = read_labels(test_labels_file)

#独热编码
def convert_labels_to_one_hot(labels, num_classes=10):
    
    # 创建独热编码矩阵
    one_hot = np.zeros((len(labels), num_classes))
    
    # 使用高级索引高效设置值
    one_hot[np.arange(len(labels)), labels] = 1.0
    
    return one_hot
t_train = convert_labels_to_one_hot(t_train,10)
t_test = convert_labels_to_one_hot(t_test,10)



In [25]:
train_size = x_train.shape[0]
batch_size = 10
batch_mask = np.random.choice(train_size,batch_size)
x_batch = x_train[batch_mask]
t_batch = t_train[batch_mask]

### mini-batch版交叉熵误差的实现 

In [27]:
def cross_entropy_error(y,t):
    if y.ndim ==1:
        t = t.reshape(1,t.size)
        y = y.reshape(1,y.size)
    batch_size = y.shape[0]
    return -np.sum(t * np.log(y+1e-7)) / batch_size


### 梯度及梯度下降法的实现

In [None]:
def numerical_gradient(f,x):#求梯度
    h = 1e-4
    grad = np.zeros_like(x)

    for idx in range(x.size):
        tmp_val = x[idx]
        x[idx] = tmp_val  + h
        fxh1 = f(x)

        x[idx] = tmp_val - h
        fxh2 = f(x)

        grad[idx] = (fxh1 - fxh2) / (2*h)
        x[idx] = tmp_val
    return grad
## 对矩阵求梯度，使用nditer迭代器
def gradient_descent(f,init_x,lr=0.01,step_num=100):
    x = init_x

    for i in range(step_num):
        grad = numerical_gradient(f,x)
        x-=lr*grad
    return x

def numerical_gradient(f, x):
    h = 1e-4 # 0.0001
    grad = np.zeros_like(x)
    
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    while not it.finished:
        idx = it.multi_index
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + h
        fxh1 = f(x) # f(x+h)
        
        x[idx] = tmp_val - h 
        fxh2 = f(x) # f(x-h)
        grad[idx] = (fxh1 - fxh2) / (2*h)
        
        x[idx] = tmp_val # 还原值
        it.iternext()   
        
    return grad


### 神经网络的梯度 

In [3]:
class simpleNet:
    def __init__(self):
        self.W = np.random.randn(2,3)
    def predict(self,x):
        return np.dot(x,self.W)
    def loss(self,x,t):
        z = self.predict(x)
        y = softmax(x)
        loss = cross_entropy_error(y,t)

        return loss


## 学习算法的实现        

### 随机梯度下降法(SGD)

step 1:mini-batch

step 2:计算梯度

step 3:更新参数

step 4:重复步骤1，2，3

### 两层神经网络的类

In [None]:



class TwolayerNet:
    def __init__(self,input_size,hidden_size,output_size,weight_init_std=0.01):
        self.params = {}
        self.params['W1'] = weight_init_std*np.random.randn(input_size,hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = weight_init_std * np.random.randn(hiddden_size,output_size)
        self.params['b2'] = np.zeros(output_size)
    def predict(self,x):
        W1,W2 = self.params['W1'],self.params['W2']
        b1,b2 = self.params['b1'],self.params['b2']

        a1 = np.dot(x,W1) + b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1,W2) + b2
        y = softmax(a2)

        return y 
    def loss(self,x,t):
        y = self.predict(x)
        return cross_entropy_error(y,t)
    
    def accuracy(self,x,t):
        y = self.predict(x)
        y = np.argmax(y,axis=1)
        t = np.argmax(t,axis=1)

        accuracy = np.sum(y == t)/float(x.shape[0])
        return accuracy
    
    def numerical_gradient(self,x,t):
        loss_W = lambda W:self.loss(x,t)

        grads = {}
        grads['W1'] = numerical_gradient(loss_W,self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W,self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W,self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W,self.params['b2'])
        return grads

### mini-batch的实现

In [None]:
train_loss_list = []
iters_num = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1

network = TwolayerNet(input_size=784,hidden_size=50,output_size=10)

for i in range(iters_num):
    batch_mask = np.random.choice(train_size,batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    grad = network.numerical_gradient(x_batch,t_batch)

    for key in ('W1','b1','W2','b2'):
        network.params[key] -= learning_rate * grad[key]

    loss = network.loss(x_batch,t_batch)
    train_loss_list.append(loss)

### 基于测试数据的评价

epoch:所有训练数据均被使用过一次时的更新数据

In [None]:
train_loss_list = []
train_acc_list = []
test_acc_list = []
iter_per_epoch = max(train_size / batch_size,1)

iters_num = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1

network = TwolayerNet(input_size=784,hidden_size=50,output_size=10)

for i in range(iters_num):
    batch_mask = np.random.choice(train_size,batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    grad = network.numerical_gradient(x_batch,t_batch)

    for key in ('W1','b1','W2','b2'):
        network.params[key] -= learning_rate * grad[key]

    loss = network.loss(x_batch,t_batch)
    train_loss_list.append(loss)

    if i%iter_per_epoch == 0 :
        train_acc = network.accuracy(x_train,t_train)
        test_acc = network.accuracy(x_test,t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        print("train acc,test acc |"+str(train_acc)+","+str(test_acc))
