In [1]:
import numpy as np
import math

In [2]:
def batch_gradient_descent(X, Y, learning_rate=0.1, epochs=100, random_seed=1):
    np.random.seed(random_seed)
    
    # initialize perception weights with small random numbers
    weights = np.random.normal(0, 1, (len(X[0]) + 1,)) # do not forget the bias
    
    for epoch in range(epochs):
        delta_weights = np.zeros(np.shape(weights))

        for i in range(len(X)):
            # preprend a 1 for the bias
            input_with_bias = np.append([1], X[i])
            
            # compute output
            output = weights.dot(input_with_bias)
            
            # accumulate deltas for each weight
            delta_weights += learning_rate * (Y[i] - output) * input_with_bias
        
        # update weights
        weights += delta_weights
    return weights

In [3]:
# AND

X = np.array([
    [-1, -1],
    [-1, 1],
    [1, -1],
    [1, 1]
])

Y = np.array([
    -1,
    -1,
    -1,
    1
])

and_weights = batch_gradient_descent(X, Y)

print(and_weights)

[-0.5  0.5  0.5]


In [4]:
# A AND (NOT B)

X = np.array([
    [-1, -1],
    [-1, 1],
    [1, -1],
    [1, 1]
])

Y = np.array([
    -1,
    -1,
    1,
    -1
])

weights = batch_gradient_descent(X, Y, learning_rate=0.1, epochs=100)

print(weights)

X_with_bias = np.insert(X, 0, 1, axis=1) # prepend a column of ones to X for the bias
X_with_bias.dot(weights)

[-0.5  0.5 -0.5]


array([-0.5, -1.5,  0.5, -0.5])

In [5]:
def sigmoid(x):
    return 1 / (1 + math.exp(-x))

def batch_gradient_descent_sigmoid(X, Y, learning_rate = 1, epochs = 1000, random_seed = 1):
    np.random.seed(random_seed)
    
    # initialize perception weights with small random numbers
    weights = np.random.normal(0, 1, (len(X[0]) + 1,)) # do not forget the bias
    
    for epoch in range(epochs):
        delta_weights = np.zeros(np.shape(weights))

        for i in range(len(X)):
            # preprend a 1 for the bias
            input_with_bias = np.append([1], X[i])
            
            # compute output
            output = sigmoid(weights.dot(input_with_bias))
            
            # accumulate deltas for each weight
            delta_weights += learning_rate * (Y[i] - output) * output * (1 - output) * input_with_bias
        
        # update weights
        weights += delta_weights
    return weights

In [6]:
# AND

X = np.array([
    [-1, -1],
    [-1, 1],
    [1, -1],
    [1, 1]
])

Y = np.array([
    -1,
    -1,
    -1,
    1
])

and_weights = batch_gradient_descent_sigmoid(X, Y, learning_rate=50, epochs=10000)

print(and_weights)
X_with_bias = np.insert(X, 0, 1, axis=1) # prepend a column of ones to X for the bias
list(map(sigmoid, X_with_bias.dot(and_weights)))

[-17.68666264  15.14602335  17.48261067]


[1.4071626682154263e-22,
 2.1554944080603084e-07,
 2.013784680660647e-09,
 0.9999996758215538]

In [7]:
def sigmoid(x):
    return 1 / (1 + math.exp(-x))

def stochastic_gradient_descent_sigmoid(X, Y, learning_rate = 1, epochs = 1000, random_seed = 1):
    np.random.seed(random_seed)
    
    # initialize perception weights with small random numbers
    weights = np.random.normal(0, 1, (len(X[0]) + 1,)) # do not forget the bias
    
    for epoch in range(epochs):
        delta_weights = np.zeros(np.shape(weights))

        for i in range(len(X)):
            # preprend a 1 for the bias
            input_with_bias = np.append([1], X[i])
            
            # compute output
            output = sigmoid(weights.dot(input_with_bias))
            
            # accumulate deltas for each weight
            delta_weights += learning_rate * (Y[i] - output) * output * (1 - output) * input_with_bias

        # update weights
        weights += delta_weights
    return weights