In [10]:
# Input layer weights and biases
w = [[0.15, 0.2], [0.25, 0.3]]
b = [0.35, 0.35]

# Hidden layer weights and biases
wh = [[0.4, 0.45], [0.5, 0.55]]
bh = [0.6, 0.6]

# Output layer weights and biases
wo = [[0.01, 0.99], [0.01, 0.99]]
bo = [0.01, 0.01]

In [11]:
import math

def sigmoid(x):
    return 1 / (1 + math.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

In [12]:
def forward_pass(inputs):
    # Calculate the inputs to the hidden layer
    h_input = [sum([inputs[j] * w[i][j] for j in range(len(inputs))]) + b[i] for i in range(len(b))]

    # Apply the sigmoid activation function to the hidden layer
    h_output = [sigmoid(x) for x in h_input]

    # Calculate the inputs to the output layer
    o_input = [sum([h_output[j] * wh[i][j] for j in range(len(h_output))]) + bh[i] for i in range(len(bh))]

    # Apply the sigmoid activation function to the output layer
    o_output = [sigmoid(x) for x in o_input]

    return h_output, o_output

In [13]:
def backward_pass(inputs, h_output, o_output, target):
    # Calculate the error in the output layer
    o_error = [target[i] - o_output[i] for i in range(len(target))]

    # Calculate the delta for the output layer
    o_delta = [o_error[i] * sigmoid_derivative(o_output[i]) for i in range(len(o_error))]

    # Calculate the error in the hidden layer
    h_error = [sum([o_delta[j] * wh[j][i] for j in range(len(o_delta))]) for i in range(len(h_output))]

    # Calculate the delta for the hidden layer
    h_delta = [h_error[i] * sigmoid_derivative(h_output[i]) for i in range(len(h_error))]

    return o_delta, h_delta

In [17]:
def update_weights(inputs, h_output, o_delta, h_delta, lr):
    global w, wh, b, bh, wo, bo

    # Update weights and biases in the output layer
    wo = [[wo[i][j] + lr * o_delta[i] * h_output[j] for j in range(len(wo[0]))] for i in range(len(wo))]
    bo = [bo[i] + lr * o_delta[i] for i in range(len(bo))]

    # Update weights and biases in the hidden layer
    wh = [[wh[i][j] + lr * h_delta[i] * h_output[j] for j in range(len(wh[0]))] for i in range(len(wh))]
    bh = [bh[i] + lr * h_delta[i] for i in range(len(bh))]

    # Update weights and biases in the input layer
    w = [[w[i][j] + lr * inputs[j] * h_delta[i] for j in range(len(w[0]))] for i in range(len(w))]
    b = [b[i] + lr * h_delta[i] for i in range(len(b))]


In [18]:
def train(inputs, target, lr):
    h_output, o_output = forward_pass(inputs)
    o_delta, h_delta = backward_pass(inputs, h_output, o_output, target)
    update_weights(inputs, h_output, o_delta, h_delta, lr)

In [38]:
# Training inputs and target outputs
inputs = [0.05, 0.10]
target = [0.01, 0.99]

# Learning rate
lr = 0.5

# Number of iterations (epochs)
epochs = 100

for _ in range(epochs):
    train(inputs, target, lr)

In [39]:
# ... (previous code)

# Training Loop:
for epoch in range(epochs):
    train(inputs, target, lr)
    if (epoch+1) % 1 == 0:
        h_output, o_output = forward_pass(inputs)
        print(f"Epoch {epoch+1}: Predicted Output = {o_output}, Loss = {sum([(target[i] - o_output[i])**2 for i in range(len(target))])/2}")


Epoch 1: Predicted Output = [0.5386244805683293, 0.4945947219146775], Loss = 0.26243511550546583
Epoch 2: Predicted Output = [0.5386244805683293, 0.4945947219146775], Loss = 0.26243511550546583
Epoch 3: Predicted Output = [0.5386244805683293, 0.4945947219146775], Loss = 0.26243511550546583
Epoch 4: Predicted Output = [0.5386244805683293, 0.4945947219146775], Loss = 0.26243511550546583
Epoch 5: Predicted Output = [0.5386244805683293, 0.4945947219146775], Loss = 0.26243511550546583
Epoch 6: Predicted Output = [0.5386244805683293, 0.4945947219146775], Loss = 0.26243511550546583
Epoch 7: Predicted Output = [0.5386244805683293, 0.4945947219146775], Loss = 0.26243511550546583
Epoch 8: Predicted Output = [0.5386244805683293, 0.4945947219146775], Loss = 0.26243511550546583
Epoch 9: Predicted Output = [0.5386244805683293, 0.4945947219146775], Loss = 0.26243511550546583
Epoch 10: Predicted Output = [0.5386244805683293, 0.4945947219146775], Loss = 0.26243511550546583
Epoch 11: Predicted Output = 

In [40]:
# Training inputs and target outputs
inputs = [0.05, 0.10]
target = [0.01, 0.99]

# Learning rate
lr = 0.5

# Number of iterations (epochs)
epochs = 1

for _ in range(epochs):
    train(inputs, target, lr)

h_output, o_output = forward_pass(inputs)
error = sum([(target[i] - o_output[i])**2 for i in range(len(target))]) / 2
print(f"Total Error after first round of Backpropagation: {error}")


Total Error after first round of Backpropagation: 0.2624351155054656


In [41]:
def relu(x):
    return max(0, x)

def relu_derivative(x):
    return 1 if x > 0 else 0


In [42]:
import math

def tanh(x):
    return math.tanh(x)

def tanh_derivative(x):
    return 1 - math.tanh(x)**2


In [43]:
def forward_pass(inputs):
    # Calculate the inputs to the hidden layer
    h_input = [sum([inputs[j] * w[i][j] for j in range(len(inputs))]) + b[i] for i in range(len(b))]

    # Apply the sigmoid activation function to the hidden layer
    h_output = [relu(x) for x in h_input]

    # Calculate the inputs to the output layer
    o_input = [sum([h_output[j] * wh[i][j] for j in range(len(h_output))]) + bh[i] for i in range(len(bh))]

    # Apply the sigmoid activation function to the output layer
    o_output = [relu(x) for x in o_input]

    return h_output, o_output

In [48]:
def backward_pass(inputs, h_output, o_output, target):
    # Calculate the error in the output layer
    o_error = [target[i] - o_output[i] for i in range(len(target))]

    # Calculate the delta for the output layer
    o_delta = [o_error[i] * relu_derivative(o_output[i]) for i in range(len(o_error))]

    # Calculate the error in the hidden layer
    h_error = [sum([o_delta[j] * wh[j][i] for j in range(len(o_delta))]) for i in range(len(h_output))]

    # Calculate the delta for the hidden layer
    h_delta = [h_error[i] * tanh_derivative(h_output[i]) for i in range(len(h_error))]

    return o_delta, h_delta
    
    
def update_weights(inputs, h_output, o_delta, h_delta, lr):
    global w, wh, b, bh, wo, bo

    # Update weights and biases in the output layer
    wo = [[wo[i][j] + lr * o_delta[i] * h_output[j] for j in range(len(wo[0]))] for i in range(len(wo))]
    bo = [bo[i] + lr * o_delta[i] for i in range(len(bo))]

    # Update weights and biases in the hidden layer
    wh = [[wh[i][j] + lr * h_delta[i] * h_output[j] for j in range(len(wh[0]))] for i in range(len(wh))]
    bh = [bh[i] + lr * h_delta[i] for i in range(len(bh))]

    # Update weights and biases in the input layer
    w = [[w[i][j] + lr * inputs[j] * h_delta[i] for j in range(len(w[0]))] for i in range(len(w))]
    b = [b[i] + lr * h_delta[i] for i in range(len(b))]


def train(inputs, target, lr):
    h_output, o_output = forward_pass(inputs)
    o_delta, h_delta = backward_pass(inputs, h_output, o_output, target)
    update_weights(inputs, h_output, o_delta, h_delta, lr)
    
# Training inputs and target outputs
inputs = [0.05, 0.10]
target = [0.01, 0.99]

# Learning rate
lr = 0.5

# Number of iterations (epochs)
epochs = 100

for _ in range(epochs):
    train(inputs, target, lr)
    
    
# Training Loop:
for epoch in range(epochs):
    train(inputs, target, lr)
    if (epoch+1) % 1 == 0:
        h_output, o_output = forward_pass(inputs)
        print(f"Epoch {epoch+1}: Predicted Output = {o_output}, Loss = {sum([(target[i] - o_output[i])**2 for i in range(len(target))])/2}")


Epoch 1: Predicted Output = [0.44173849016736644, 0.47942862929534336], Loss = 0.22354062423761453
Epoch 2: Predicted Output = [0.44176885387108966, 0.47924576826464294], Loss = 0.2236471142043646
Epoch 3: Predicted Output = [0.4417987361769198, 0.47906341427663757], Loss = 0.22375317159731603
Epoch 4: Predicted Output = [0.44182815976381157, 0.47888160269548413], Loss = 0.22385878781406846
Epoch 5: Predicted Output = [0.441857145440291, 0.4787003662710294], Loss = 0.22396395475960806
Epoch 6: Predicted Output = [0.44188571230583445, 0.4785197353471322], Loss = 0.22406866481164284
Epoch 7: Predicted Output = [0.4419138778984022, 0.47833973805228946], Loss = 0.22417291078881782
Epoch 8: Predicted Output = [0.4419416583293137, 0.47816040047408853], Loss = 0.22427668592156152
Epoch 9: Predicted Output = [0.44196906840655487, 0.47798174681886973], Loss = 0.22437998382534147
Epoch 10: Predicted Output = [0.44199612174750647, 0.4778037995578625], Loss = 0.22448279847612432
Epoch 11: Predicte

In [49]:
def forward_pass(inputs):
    # Calculate the inputs to the hidden layer
    h_input = [sum([inputs[j] * w[i][j] for j in range(len(inputs))]) + b[i] for i in range(len(b))]

    # Apply the sigmoid activation function to the hidden layer
    h_output = [tanh(x) for x in h_input]

    # Calculate the inputs to the output layer
    o_input = [sum([h_output[j] * wh[i][j] for j in range(len(h_output))]) + bh[i] for i in range(len(bh))]

    # Apply the sigmoid activation function to the output layer
    o_output = [tanh(x) for x in o_input]

    return h_output, o_output

In [50]:
def backward_pass(inputs, h_output, o_output, target):
    # Calculate the error in the output layer
    o_error = [target[i] - o_output[i] for i in range(len(target))]

    # Calculate the delta for the output layer
    o_delta = [o_error[i] * sigmoid_derivative(o_output[i]) for i in range(len(o_error))]

    # Calculate the error in the hidden layer
    h_error = [sum([o_delta[j] * wh[j][i] for j in range(len(o_delta))]) for i in range(len(h_output))]

    # Calculate the delta for the hidden layer
    h_delta = [h_error[i] * sigmoid_derivative(h_output[i]) for i in range(len(h_error))]

    return o_delta, h_delta
    
    
def update_weights(inputs, h_output, o_delta, h_delta, lr):
    global w, wh, b, bh, wo, bo

    # Update weights and biases in the output layer
    wo = [[wo[i][j] + lr * o_delta[i] * h_output[j] for j in range(len(wo[0]))] for i in range(len(wo))]
    bo = [bo[i] + lr * o_delta[i] for i in range(len(bo))]

    # Update weights and biases in the hidden layer
    wh = [[wh[i][j] + lr * h_delta[i] * h_output[j] for j in range(len(wh[0]))] for i in range(len(wh))]
    bh = [bh[i] + lr * h_delta[i] for i in range(len(bh))]

    # Update weights and biases in the input layer
    w = [[w[i][j] + lr * inputs[j] * h_delta[i] for j in range(len(w[0]))] for i in range(len(w))]
    b = [b[i] + lr * h_delta[i] for i in range(len(b))]


def train(inputs, target, lr):
    h_output, o_output = forward_pass(inputs)
    o_delta, h_delta = backward_pass(inputs, h_output, o_output, target)
    update_weights(inputs, h_output, o_delta, h_delta, lr)
    
# Training inputs and target outputs
inputs = [0.05, 0.10]
target = [0.01, 0.99]

# Learning rate
lr = 0.5

# Number of iterations (epochs)
epochs = 100

for _ in range(epochs):
    train(inputs, target, lr)
    
    
# Training Loop:
for epoch in range(epochs):
    train(inputs, target, lr)
    if (epoch+1) % 1 == 0:
        h_output, o_output = forward_pass(inputs)
        print(f"Epoch {epoch+1}: Predicted Output = {o_output}, Loss = {sum([(target[i] - o_output[i])**2 for i in range(len(target))])/2}")


Epoch 1: Predicted Output = [0.4459098062021061, 0.4685057329801445], Loss = 0.23098681483886702
Epoch 2: Predicted Output = [0.44592896342242705, 0.46852828881541286], Loss = 0.23098340335816658
Epoch 3: Predicted Output = [0.44594804592140264, 0.46855072582422924], Loss = 0.2309800221405638
Epoch 4: Predicted Output = [0.4459670539478235, 0.46857304442379943], Loss = 0.23097667106470474
Epoch 5: Predicted Output = [0.445985987749823, 0.46859524503019107], Loss = 0.23097335000965763
Epoch 6: Predicted Output = [0.4460048475748782, 0.4686173280583324], Loss = 0.2309700588549127
Epoch 7: Predicted Output = [0.4460236336698095, 0.4686392939220155], Loss = 0.2309667974803794
Epoch 8: Predicted Output = [0.44604234628078143, 0.468661143033896], Loss = 0.23096356576638633
Epoch 9: Predicted Output = [0.44606098565330343, 0.4686828758054944], Loss = 0.23096036359368002
Epoch 10: Predicted Output = [0.44607955203222976, 0.46870449264719666], Loss = 0.23095719084342345
Epoch 11: Predicted Outp

In [47]:
import numpy as np

def softmax(x):
    exp_x = np.exp(x - np.max(x))
    return exp_x / exp_x.sum(axis=0, keepdims=True)

def softmax_derivative(x):
    # Derivative of softmax is calculated separately during backpropagation
    return x
