In [9]:
import numpy as np

def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=-1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=-1, keepdims=True)

def linear_activation(x):
    return np.sum(x, axis=1, keepdims=True)  # Sum along axis 1 to get a column vector

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def initialize_parameters(input_size, hidden_size1, hidden_size2, output_size):
    np.random.seed(42)
    params = {
        'W1': np.random.randn(input_size, hidden_size1),
        'b1': np.zeros((1, hidden_size1)),
        'W2': np.random.randn(hidden_size1, hidden_size2),
        'b2': np.zeros((1, hidden_size2)),
        'W3': np.random.randn(hidden_size2, output_size),
        'b3': np.zeros((1, output_size))
    }
    return params

def forward_propagation(X, params):
    Z1 = np.dot(X, params['W1']) + params['b1']
    A1 = softmax(Z1)

    Z2 = np.dot(A1, params['W2']) + params['b2']
    A2 = softmax(Z2)

    Z3 = np.dot(A2, params['W3']) + params['b3']
    # print(Z3) Debug line
    A3 = linear_activation(Z3)

    return {'Z1': Z1, 'A1': A1, 'Z2': Z2, 'A2': A2, 'Z3': Z3, 'A3': A3}

def compute_loss(Y, A3):
    m = Y.shape[0]
    loss = -np.sum(Y * np.log(A3)) / m
    return loss

def backward_propagation(X, Y, params, cache):
    m = X.shape[0]

    dZ3 = cache['A3'] - Y
    dW3 = np.dot(cache['A2'].T, dZ3) / m
    db3 = np.sum(dZ3, axis=0, keepdims=True) / m

    dZ2 = np.dot(dZ3, params['W3'].T) * (cache['A2'] * (1 - cache['A2']))
    dW2 = np.dot(cache['A1'].T, dZ2) / m
    db2 = np.sum(dZ2, axis=0, keepdims=True) / m

    dZ1 = np.dot(dZ2, params['W2'].T) * (cache['A1'] * (1 - cache['A1']))
    dW1 = np.dot(X.T, dZ1) / m
    db1 = np.sum(dZ1, axis=0, keepdims=True) / m

    gradients = {'dW1': dW1, 'db1': db1, 'dW2': dW2, 'db2': db2, 'dW3': dW3, 'db3': db3}

    return gradients

def update_parameters(params, gradients, learning_rate):
    params['W1'] -= learning_rate * gradients['dW1']
    params['b1'] -= learning_rate * gradients['db1']
    params['W2'] -= learning_rate * gradients['dW2']
    params['b2'] -= learning_rate * gradients['db2']
    params['W3'] -= learning_rate * gradients['dW3']
    params['b3'] -= learning_rate * gradients['db3']

    return params

def train_neural_network(X, Y, input_size, hidden_size1, hidden_size2, output_size, learning_rate, epochs):
    params = initialize_parameters(input_size, hidden_size1, hidden_size2, output_size)
    prev_loss = float('inf')
    for epoch in range(epochs):
        # Forward propagation
        cache = forward_propagation(X, params)

        # Compute loss
        loss = compute_loss(Y, cache['A3'])
        if loss > prev_loss:
            print(f'Stopping training at epoch {epoch}. Loss is not decreasing.')
            print(f'Current Loss: {loss}')
            print(f'Prev Loss: {prev_loss}')
            break

        # Backward propagation
        gradients = backward_propagation(X, Y, params, cache)

        # Update parameters
        params = update_parameters(params, gradients, learning_rate)

        if epoch % 100 == 0:
            print(f'Epoch {epoch}, Loss: {loss}')
        
        prev_loss = loss

    return params

# Sample data
X = np.array([[0, 0.5, 1]])
Y = np.array([[0, 1, 0]])

# Hyperparameters
input_size = X.shape[1]
hidden_size1 = 4
hidden_size2 = 3
output_size = Y.shape[1]
learning_rate = 0.01
epochs = 10000

# Train the neural network
trained_params = train_neural_network(X, Y, input_size, hidden_size1, hidden_size2, output_size, learning_rate, epochs)



Epoch 0, Loss: 2.5408254939031316
Stopping training at epoch 99. Loss is not decreasing.
Current Loss: 0.9074575623817628
Prev Loss: 0.90743838557969


In [12]:
import plotly.graph_objects as go

# ... (previous functions remain the same)

def train_neural_network(X, Y, input_size, hidden_size1, hidden_size2, output_size, learning_rate, epochs):
    params = initialize_parameters(input_size, hidden_size1, hidden_size2, output_size)
    prev_loss = float('inf')  # Initialize with a large value

    # Lists to store loss values and epoch numbers for plotting
    loss_values = []
    epoch_numbers = []

    for epoch in range(epochs):
        # Forward propagation
        cache = forward_propagation(X, params)

        # Compute loss
        loss = compute_loss(Y, cache['A3'])

        # Check if the loss is not decreasing
        #if loss >= prev_loss:
        #    print(f'Stopping training at epoch {epoch}. Loss is not decreasing.')
        #    break

        # Backward propagation
        gradients = backward_propagation(X, Y, params, cache)

        # Update parameters
        params = update_parameters(params, gradients, learning_rate)

        # Print the loss for every 100 epochs
        if epoch % 100 == 0:
            print(f'Epoch {epoch}, Loss: {loss}')

        # Update previous loss
        prev_loss = loss

        # Append values for plotting
        loss_values.append(loss)
        epoch_numbers.append(epoch)

    # Plot the loss curve
    plot_loss_curve(epoch_numbers, loss_values)

    return params

def plot_loss_curve(epoch_numbers, loss_values):
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=epoch_numbers, y=loss_values, mode='lines', name='Loss'))
    fig.update_layout(title='Loss Curve During Training', xaxis_title='Epoch', yaxis_title='Loss')
    fig.show()

# Sample data
X = np.array([[0, 0.5, 1]])
Y = np.array([[0, 1, 0]])

# Hyperparameters
input_size = X.shape[1]
hidden_size1 = 4
hidden_size2 = 3
output_size = Y.shape[1]
learning_rate = 0.01
epochs = 1000

# Train the neural network
trained_params = train_neural_network(X, Y, input_size, hidden_size1, hidden_size2, output_size, learning_rate, epochs)


Epoch 0, Loss: 2.5408254939031316
Epoch 100, Loss: 0.9074967342132755
Epoch 200, Loss: 0.9561889116742606
Epoch 300, Loss: 1.0173440114415433
Epoch 400, Loss: 1.054906379800682
Epoch 500, Loss: 1.0740090385412326
Epoch 600, Loss: 1.0837720552518564
Epoch 700, Loss: 1.0890721314961462
Epoch 800, Loss: 1.0921458034760145
Epoch 900, Loss: 1.0940363269149544


In [18]:
-1*np.log(0.999)

0.0010005003335835344

In [19]:
-1*np.log(0.0000001)

16.11809565095832