In [36]:
import numpy as np

In [37]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [38]:
def sigmoid_derivative(x):
    return sigmoid(x) * (1 - sigmoid(x))

In [39]:
def initialize_parameters(input_size, hidden_size, output_size):
    np.random.seed(0)
    W1 = np.random.randn(hidden_size, input_size) * 0.01
    b1 = np.zeros((hidden_size, 1))
    W2 = np.random.randn(output_size, hidden_size) * 0.01
    b2 = np.zeros((output_size, 1))
    return {"W1": W1, "b1": b1, "W2": W2, "b2": b2}

In [40]:
def forward_propagation(X, parameters):
    Z1 = np.dot(parameters["W1"], X) + parameters["b1"]
    A1 = sigmoid(Z1)
    Z2 = np.dot(parameters["W2"], A1) + parameters["b2"]
    A2 = sigmoid(Z2)
    cache = {"Z1": Z1, "A1": A1, "Z2": Z2, "A2": A2}
    return A2, cache

In [41]:
def compute_cost(A2, Y):
    m = Y.shape[1]
    cost = -np.sum(Y * np.log(A2) + (1 - Y) * np.log(1 - A2)) / m
    return np.squeeze(cost)

In [42]:
def backward_propagation(X, Y, parameters, cache):
    m = X.shape[1]
    dZ2 = cache["A2"] - Y
    dW2 = np.dot(dZ2, cache["A1"].T) / m
    db2 = np.sum(dZ2, axis=1, keepdims=True) / m
    dZ1 = np.dot(parameters["W2"].T, dZ2) * sigmoid_derivative(cache["Z1"])
    dW1 = np.dot(dZ1, X.T) / m
    db1 = np.sum(dZ1, axis=1, keepdims=True) / m
    gradients = {"dW1": dW1, "db1": db1, "dW2": dW2, "db2": db2}
    return gradients

In [43]:
def update_parameters(parameters, gradients, learning_rate):
    parameters["W1"] -= learning_rate * gradients["dW1"]
    parameters["b1"] -= learning_rate * gradients["db1"]
    parameters["W2"] -= learning_rate * gradients["dW2"]
    parameters["b2"] -= learning_rate * gradients["db2"]
    return parameters

In [44]:
def train_model(X, Y, hidden_size, num_iterations, learning_rate):
    input_size = X.shape[0]
    output_size = Y.shape[0]
    parameters = initialize_parameters(input_size, hidden_size, output_size)

    for i in range(num_iterations):
        A2, cache = forward_propagation(X, parameters)
        cost = compute_cost(A2, Y)
        gradients = backward_propagation(X, Y, parameters, cache)
        parameters = update_parameters(parameters, gradients, learning_rate)

        if i % 1000 == 0:
            print(f"Cost after iteration {i}: {cost}")

    return parameters

In [45]:
# Example usage
X = np.array([[0, 0, 1, 1], [0, 1, 0, 1]])
Y = np.array([[0, 1, 1, 0]])
hidden_size = 4
num_iterations = 10000
learning_rate = 0.01

parameters = train_model(X, Y, hidden_size, num_iterations, learning_rate)
print("Trained parameters:")
print(parameters)

Cost after iteration 0: 0.6931586006344547
Cost after iteration 1000: 0.6931471809953038
Cost after iteration 2000: 0.6931471805117335
Cost after iteration 3000: 0.6931471805116804
Cost after iteration 4000: 0.6931471805116478


Cost after iteration 5000: 0.6931471805116151
Cost after iteration 6000: 0.6931471805115825
Cost after iteration 7000: 0.6931471805115498
Cost after iteration 8000: 0.6931471805115171
Cost after iteration 9000: 0.6931471805114846
Trained parameters:
{'W1': array([[ 0.01764151,  0.00400172],
       [ 0.0097869 ,  0.02240792],
       [ 0.01867564, -0.00977291],
       [ 0.00949242, -0.00152101]]), 'b1': array([[ 2.27507695e-06],
       [-2.85942076e-06],
       [-4.26861137e-08],
       [-1.61627709e-05]]), 'W2': array([[-0.0034238 ,  0.00171248, -0.000946  ,  0.01215819]]), 'b2': array([[-0.00475903]])}
