In [2]:
from sklearn.datasets import fetch_openml
import numpy as np
np.random.seed(42)

In [3]:
def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return (x > 0).astype(float)

class DenseLayer:
    def __init__(self, input_size, output_size):
        self.weights = np.random.randn(input_size, output_size) * 0.01
        self.biases = np.zeros((1, output_size))
    
    def forward(self, input):
        self.input = input
        return np.dot(input, self.weights) + self.biases
    
    def backward(self, output_gradient, learning_rate):
        weights_gradient = np.dot(self.input.T, output_gradient)
        input_gradient = np.dot(output_gradient, self.weights.T)
        self.weights -= learning_rate * weights_gradient
        self.biases -= learning_rate * np.sum(output_gradient, axis=0, keepdims=True)
        return input_gradient


In [None]:
class MultiLayerNetwork:
    def __init__(self, layer_sizes):
        self.layers = []
        for i in range(len(layer_sizes) - 1):
            self.layers.append(DenseLayer(layer_sizes[i], layer_sizes[i+1]))

    def forward(self, input_data):
        self.activations = [input_data]
        x = input_data
        for layer in self.layers:
            x = layer.forward(x)
            x = relu(x)  # Apply ReLU activation function
            self.activations.append(x)
        return x

    def backward(self, y_true, learning_rate):
        # Assume the last layer activation is softmax and loss is cross-entropy
        last_activation = self.activations[-1]
        y_pred = softmax(last_activation)
        output_gradient = (y_pred - y_true) / y_pred.shape[0]  # Cross-entropy gradient

        for i in reversed(range(len(self.layers))):
            output_gradient = relu_derivative(self.activations[i+1]) * output_gradient
            output_gradient = self.layers[i].backward(output_gradient, learning_rate)

    def train(self, x_train, y_train, learning_rate, epochs):
        for epoch in range(epochs):
            for x, y in zip(x_train, y_train):
                output = self.forward(x)
                self.backward(y, learning_rate)
            if epoch % 10 == 0:
                print(f'Epoch {epoch}, Loss: {self.compute_loss(x_train, y_train)}')

    def compute_loss(self, x_train, y_train):
        loss = 0
        for x, y in zip(x_train, y_train):
            y_pred = softmax(self.forward(x))
            loss += -np.sum(y * np.log(y_pred))
        return loss / len(x_train)
    
def softmax(logits):
    """Compute softmax values for each set of scores in logits."""
    exp_scores = np.exp(logits - np.max(logits, axis=1, keepdims=True))
    return exp_scores / np.sum(exp_scores, axis=1, keepdims=True)

In [5]:
# 加载MNIST数据
mnist = fetch_openml('mnist_784', version=1)
X, y = mnist["data"], mnist["target"]

In [14]:
input_size = X.shape[1]
output_size = 1

layer_sizes = [input_size, 1024, 1024, 1024,
               1024, 1024, output_size]  # 六个隐藏层加输出层

# 生成数据（示例）

X_np = X.values
y_np = y.values
print(X_np.shape, y_np.shape)
network = MultiLayerNetwork(layer_sizes)
network.train(X_np, y_np, learning_rate=0.01, epochs=100)

(70000, 784) (70000,)


UFuncTypeError: ufunc 'subtract' did not contain a loop with signature matching types (dtype('float64'), dtype('<U1')) -> None