In [21]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

In [22]:
data = load_iris()
X, y = data.data, data.target
y = np.eye(3)[y]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [23]:
input_size = X_train.shape[1]
hidden_size = 10 
output_size = y_train.shape[1]
learning_rate = 0.01
epochs = 500

In [24]:
W1 = np.random.randn(input_size, hidden_size)
b1 = np.zeros((1, hidden_size))
W2 = np.random.randn(hidden_size, output_size)
b2 = np.zeros((1, output_size))

In [25]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [26]:
def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)

In [27]:
def sigmoid_derivative(x):
    return x * (1 - x)

In [28]:
def forward(X):
    global z1, a1, z2, a2
    z1 = np.dot(X, W1) + b1
    a1 = sigmoid(z1)
    z2 = np.dot(a1, W2) + b2
    a2 = softmax(z2)
    return a2

In [29]:
def backward(X, y, output):
    global W1, b1, W2, b2
    error = output - y
    dW2 = np.dot(a1.T, error)
    db2 = np.sum(error, axis=0, keepdims=True)
    dW1 = np.dot(X.T, np.dot(error, W2.T) * sigmoid_derivative(a1))
    db1 = np.sum(np.dot(error, W2.T) * sigmoid_derivative(a1), axis=0)
    
    W1 -= learning_rate * dW1
    b1 -= learning_rate * db1
    W2 -= learning_rate * dW2
    b2 -= learning_rate * db2

In [30]:
for epoch in range(epochs):
    output = forward(X_train)
    backward(X_train, y_train, output)
    if epoch % 50 == 0:
        loss = -np.mean(np.log(output[np.arange(len(y_train)), np.argmax(y_train, axis=1)]))
        print(f'Epoch {epoch}, Loss: {loss}')


Epoch 0, Loss: 1.5726181870291935
Epoch 50, Loss: 0.5061102801789394
Epoch 100, Loss: 0.4720298850878603
Epoch 150, Loss: 0.31971184262047697
Epoch 200, Loss: 0.43608732684920365
Epoch 250, Loss: 0.26783820479918974
Epoch 300, Loss: 0.47715729713757987
Epoch 350, Loss: 0.23870507639858782
Epoch 400, Loss: 0.3872259339528161
Epoch 450, Loss: 0.15177051060916666


In [31]:
def predict(X):
    output = forward(X)
    return np.argmax(output, axis=1)

In [32]:
train_predictions = predict(X_train)
train_accuracy = np.mean(train_predictions == np.argmax(y_train, axis=1))

In [33]:
test_predictions = predict(X_test)
test_accuracy = np.mean(test_predictions == np.argmax(y_test, axis=1))

In [34]:
print(f'Training accuracy: {train_accuracy * 100:.2f}%')
print(f'Testing accuracy: {test_accuracy * 100:.2f}%')

Training accuracy: 67.50%
Testing accuracy: 63.33%
