In [1]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

c:\Users\zhufe\anaconda3\lib\site-packages\numpy\.libs\libopenblas.EL2C6PLE4ZYW3ECEVIV3OXXGRN2NRFM2.gfortran-win_amd64.dll
c:\Users\zhufe\anaconda3\lib\site-packages\numpy\.libs\libopenblas.GK7GX5KEQ4F6UYO3P26ULGBQYHGQO7J4.gfortran-win_amd64.dll
c:\Users\zhufe\anaconda3\lib\site-packages\numpy\.libs\libopenblas.WCDJNK7YVMPZQ2ME2ZZHJJRJ3JIKNDB7.gfortran-win_amd64.dll


In [20]:
class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size, learning_rate = 0.01):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.learning_rate = learning_rate
        
        self.weights1 = np.random.randn(self.hidden_size, self.input_size)
        self.biases1 = np.random.randn(self.hidden_size, 1)
        self.weights2 = np.random.randn(self.output_size, self.hidden_size)
        self.biases2 = np.random.randn(self.output_size, 1)

        self.prev_weights1_update = np.zeros_like(self.weights1)
        self.prev_biases1_update = np.zeros_like(self.biases1)
        self.prev_weights2_update = np.zeros_like(self.weights2)
        self.prev_biases2_update = np.zeros_like(self.biases2)

        self.weights1_delta = np.zeros_like(self.weights1)
        self.biases1_delta = np.zeros_like(self.biases1)
        self.weights2_delta = np.zeros_like(self.weights2)
        self.biases2_delta = np.zeros_like(self.biases2)

        self.prev_weights1_delta = np.zeros_like(self.weights1)
        self.prev_biases1_delta = np.zeros_like(self.biases1)
        self.prev_weights2_delta = np.zeros_like(self.weights2)
        self.prev_biases2_delta = np.zeros_like(self.biases2)
        self.prev_error = np.inf
        
    def forward(self, X):
        self.z1 = self.weights1 @ X.T + self.biases1
        self.a1 = self.sigmoid(self.z1)
        self.z2 = self.weights2 @ self.a1 + self.biases2
        self.a2 = self.sigmoid(self.z2)
        return self.a2
    
    def sigmoid(self, s):
        return 1/(1+np.exp(-s))

    def sigmoid_derivative(self, s):
        return s * (1 - s)

    def backward(self, X, y):

        m = X.shape[0]
        y = y.reshape(-1, 1)
    
        self.error_delta = -2*(y.T - self.a2)
        self.z2_delta = self.sigmoid_derivative(self.a2) * self.error_delta
        self.z1_delta = self.sigmoid_derivative(self.a1) * (self.weights2.T @ self.z2_delta)

        self.weights2_delta = self.z2_delta @ self.a1.T / m
        self.biases2_delta = np.sum(self.z2_delta, axis=1, keepdims=True) / m
        self.weights1_delta = self.z1_delta @ X / m
        self.biases1_delta = np.sum(self.z1_delta, axis=1, keepdims=True) / m

    def mse(self, y_true, y_pred):
        return np.mean((y_true - y_pred)**2)

    def print_mse(self, X, y):
        print(f"MSE: {self.mse(y.T, self.forward(X))}")

    def update(self):
        weights1_update = self.weights1_delta * self.prev_weights1_update / (self.prev_weights1_delta - self.weights1_delta)
        biases1_update = self.biases1_delta * self.prev_biases1_update / (self.prev_biases1_delta - self.biases1_delta)
        weights2_update = self.weights2_delta * self.prev_weights2_update / (self.prev_weights2_delta - self.weights2_delta)
        biases2_update = self.biases2_delta * self.prev_biases2_update / (self.prev_biases2_delta - self.biases2_delta)

        self.weights1 -= weights1_update
        self.biases1 -= biases1_update
        self.weights2 -= weights2_update
        self.biases2 -= biases2_update

        self.prev_weights1_update = weights1_update
        self.prev_biases1_update = biases1_update
        self.prev_weights2_update = weights2_update
        self.prev_biases2_update = biases2_update

        self.prev_weights2_delta = self.weights2_delta
        self.prev_biases2_delta = self.biases2_delta
        self.prev_weights1_delta = self.weights1_delta
        self.prev_biases1_delta = self.biases1_delta

   
    def batch_gradient_descent(self, X, y):
        self.forward(X)
        self.backward(X, y)
        self.update()
        self.print_mse(X, y)

    def mini_batch_gradient_descent(self, X, y, batch_size):
        num_samples = X.shape[0]
        num_batches = int(np.ceil(num_samples / batch_size))
        indices = np.random.permutation(num_samples)
        X = X[indices]
        y = y[indices]
        
        for i in range(num_batches):
            start = i * batch_size
            end = min((i + 1) * batch_size, num_samples)
            
            X_batch = X[start:end]
            y_batch = y[start:end]
            
            y_pred = self.forward(X_batch)
            self.backward(X_batch, y_batch)
            self.update()

        self.print_mse(X, y)

    def stochastic_gradient_descent(self, X,y):
        self.mini_batch_gradient_descent(X, y, 1)

In [21]:
X = datasets.load_boston()['data']
y = datasets.load_boston()['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [22]:
self = NeuralNetwork(input_size = X_train.shape[1], hidden_size = 50, output_size = 1, learning_rate = 0.01)
for _ in range(10):
    self.batch_gradient_descent(X_train, y_train)

MSE: 594.2470429635945
MSE: nan
MSE: nan
MSE: nan
MSE: nan
MSE: nan
MSE: nan
MSE: nan
MSE: nan
MSE: nan


  weights1_update = self.learning_rate * self.weights1_delta * self.prev_weights1_update / (self.prev_weights1_delta - self.weights1_delta)
  biases1_update = self.learning_rate * self.biases1_delta * self.prev_biases1_update / (self.prev_biases1_delta - self.biases1_delta)
  weights2_update = self.learning_rate * self.weights2_delta * self.prev_weights2_update / (self.prev_weights2_delta - self.weights2_delta)
  biases2_update = self.learning_rate * self.biases2_delta * self.prev_biases2_update / (self.prev_biases2_delta - self.biases2_delta)
