In [17]:
import numpy as np
import random

def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return np.where(x > 0, 1, 0)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_prime(x):
    return sigmoid(x)*(1-sigmoid(x))

def cost_derivative(y_pred, y_true):
    return (y_pred - y_true)

def mse_loss(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

def mse_loss_derivative(y_true, y_pred):
    return 2 * (y_pred - y_true) / y_true.size


In [3]:
class NumpyNN:
    def __init__(self, input_size, hidden_size, output_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        self.W1 = np.random.randn(self.input_size, self.hidden_size)
        self.W2 = np.random.randn(self.hidden_size, self.output_size)
        self.b1 = np.zeros((1, self.hidden_size))
        self.b2 = np.zeros((1, self.output_size))

    def forward(self, X):
        self.z1 = X @ self.W1 + self.b1
        self.a1 = sigmoid(self.z1)
        self.z2 = self.a1 @ self.W2 + self.b2
        self.a2 = sigmoid(self.z2)
        return self.a2

    def backward(self, X, y, output):
        self.dz2 = mse_loss_derivative(y, output) * sigmoid_prime(output)
        self.dW2 = self.a1.T @ self.dz2
        self.db2 = np.sum(self.dz2, axis=0)

        self.dz1 = self.dz2 @ self.W2.T * sigmoid_prime(self.a1)
        self.dW1 = X.T @ self.dz1
        self.db1 = np.sum(self.dz1, axis=0)

        self.W1 -= learning_rate * self.dW1
        self.W2 -= learning_rate * self.dW2
        self.b1 -= learning_rate * self.db1
        self.b2 -= learning_rate * self.db2

    def train(self, batch, epochs, learning_rate):
        for _ in range(epochs):
            for X, y in batch:
                output = self.forward(X)
                self.backward(X, y, output)


In [4]:
class BookNN:
    def __init__(self, sizes):
        self.num_layers = len(sizes)
        self.sizes = sizes
        self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
        self.weights = [np.random.randn(x, y) for x, y in zip(sizes[:-1], sizes[1:])]        
    
    def forward(self, a):
        for b, w in zip(self.biases, self.weights):
            a = sigmoid(a @ w + b)
        return a
    
    def backprop(self, x, y):
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]

        # feedforward
        activation = x
        activations = [x] # list to store all the activations, layer by layer
        zs = [] # list to store all the z vectors, layer by layer
        for b, w in zip(self.biases, self.weights):
            z = activation @ w + b
            zs.append(z)
            activation = sigmoid(z)
            activations.append(activation)

        print('>>', activations)
            
        # backward pass
        delta = cost_derivative(activations[-1], y) * sigmoid_prime(zs[-1])
        nabla_b[-1] = delta
        nabla_w[-1] = activations[-2].transpose() @ delta
        print(delta.shape)
       
        for l in range(2, self.num_layers):
            z = zs[-l]
            sp = sigmoid_prime(z)
            delta = (self.weights[-l+1].transpose() @ delta) * sp
            print(delta.shape)
            nabla_b[-l] = delta
            nabla_w[-l] = (activations[-l-1].transpose() @ delta)
        return (nabla_b, nabla_w)
                
    def update_batch(self, batch):
        for x, y in batch:
            print(x, y)
            (nabla_b, nabla_w) = self.backprop(x, y)
            print(nabla_b, nabla_w)

    def sgd(self, training_data, epochs, mini_batch_size):
        n = len(training_data)
        for j in range(epochs):
            random.shuffle(training_data)
            mini_batches = [
                training_data[k:k+mini_batch_size]
                for k in range(0, len(training_data), mini_batch_size)]
            for batch in mini_batches:
                self.update_batch(batch)

In [5]:
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([[0], [1], [1], [0]])

input_size = 2
hidden_size = 3
output_size = 1
epochs = 10000
learning_rate = 0.1

nn = NumpyNN(input_size, hidden_size, output_size)
nn.forward(X)
# nn.train([(X, y)], epochs, learning_rate)
# for x in X:
#     print(f"Input: {x}, Output: {nn.forward(x)}")



# X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
# y = np.array([[0], [1], [1], [0]])

# training_data = [(x, y) for x, y in zip(X, y)]


nn = BookNN([2, 3, 1])
nn.backprop(X[0], y[0])
# nn.sgd(training_data, epochs, 10)
# nn.forward(X[0])

    

>> [array([0, 0]), array([[0.18499073, 0.18499073, 0.18499073],
       [0.37670336, 0.37670336, 0.37670336],
       [0.59167034, 0.59167034, 0.59167034]]), array([[0.15797142],
       [0.18650638],
       [0.22304275]])]
(3, 1)
(3, 3)


ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 3 is different from 2)

In [None]:
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([[0], [1], [1], [0]])

input_size = 2
hidden_size = 3
output_size = 1
epochs = 10000
learning_rate = 0.1

nn = BookNN(input_size, hidden_size, output_size)
nn.train([(X, y)], epochs, learning_rate)

print("Neural network predictions:")
for x in X:
    print(f"Input: {x}, Output: {nn.forward(x)}")

In [None]:
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])

w = np.random.randn(2, 3)
print(w.shape)
# a = np.array(([0, 1], [0, 1]))
# w @ a

# # W1 = np.random.randn(2, 3)
# W1 = np.array([[0, 1, 2], [1, 2, 3]])
# W2 = np.random.randn(3, 1)
# b1 = np.zeros((1, 3))
# b2 = np.zeros((1, 1))

# print(X)
# print(W1)
# print(b1.shape)
# X @ W1 + b1

# np.array([X, X] @ W1 + b1)


In [52]:
eta = 0.1

input_size = 1
hidden_size = 2
output_size = 1

# w1 = np.random.randn(input_size, hidden_size)
# w2 = np.random.randn(hidden_size, output_size)

x = np.array([[0.], [1.], [2.], [3.]])
y = np.array([[0.], [1.], [0.], [1.]])
w1 = np.array([[0., 1.]])
b1 = np.array([[0.5, 0.5]])
w2 = np.array([[1.], [0.]])
b2 = np.array([[1.]])

n = 100
for i in range(n):
    z1 = x @ w1 + b1
    a1 = relu(z1)
    z2 = a1 @ w2 + b2
    a2 = relu(z2)
    loss = mse_loss(1, a2)
    if i % 10 == 0:
        print(f'loss: {loss} pred: {a2}')

    dz2 = mse_loss_derivative(y, a2) * relu_derivative(a2)
    dw2 = a1.T @ dz2
    db2 = np.sum(dz2, axis=0)

    dz1 = dz2 @ w2.T * relu_derivative(a1)
    dw1 = x.T @ dz1
    db1 = np.sum(dz1, axis=0)

    w1 -= eta * dw1
    w2 -= eta * dw2
    b1 -= eta * db1
    b2 -= eta * db2

    


loss: 0.25 pred: [[1.5]
 [1.5]
 [1.5]
 [1.5]]
loss: 0.5363399301789903 pred: [[0.69479686]
 [0.42325822]
 [0.15171958]
 [0.        ]]
loss: 0.22008356902695186 pred: [[0.53663716]
 [0.53280499]
 [0.52897283]
 [0.52514067]]
loss: 0.23084519226880315 pred: [[0.36610805]
 [0.47738841]
 [0.5940725 ]
 [0.71075659]]
loss: 0.26322668346092504 pred: [[0.29069779]
 [0.44018556]
 [0.58967333]
 [0.7391611 ]]
loss: 0.2724737595339327 pred: [[0.2531872 ]
 [0.42854846]
 [0.60390972]
 [0.77927098]]
loss: 0.2841171872693692 pred: [[0.22943437]
 [0.41619361]
 [0.60295284]
 [0.78971207]]
loss: 0.29292572531694105 pred: [[0.21462225]
 [0.40715567]
 [0.5996891 ]
 [0.79222253]]
loss: 0.2953277833552502 pred: [[0.20880568]
 [0.40470029]
 [0.60059491]
 [0.79648952]]
loss: 0.2981379178275505 pred: [[0.20368101]
 [0.40181669]
 [0.60007811]
 [0.79833953]]
