In [1]:
import numpy as np
import pandas as pd

np.random.seed(42)

In [2]:
def load_data(path: str):
    df = pd.read_csv(path)
    return df.to_numpy()

In [3]:
data = load_data("./dataset/iris.csv")
data

array([[5.1, 3.5, 1.4, 0.2, 'Setosa'],
       [4.9, 3.0, 1.4, 0.2, 'Setosa'],
       [4.7, 3.2, 1.3, 0.2, 'Setosa'],
       [4.6, 3.1, 1.5, 0.2, 'Setosa'],
       [5.0, 3.6, 1.4, 0.2, 'Setosa'],
       [5.4, 3.9, 1.7, 0.4, 'Setosa'],
       [4.6, 3.4, 1.4, 0.3, 'Setosa'],
       [5.0, 3.4, 1.5, 0.2, 'Setosa'],
       [4.4, 2.9, 1.4, 0.2, 'Setosa'],
       [4.9, 3.1, 1.5, 0.1, 'Setosa'],
       [5.4, 3.7, 1.5, 0.2, 'Setosa'],
       [4.8, 3.4, 1.6, 0.2, 'Setosa'],
       [4.8, 3.0, 1.4, 0.1, 'Setosa'],
       [4.3, 3.0, 1.1, 0.1, 'Setosa'],
       [5.8, 4.0, 1.2, 0.2, 'Setosa'],
       [5.7, 4.4, 1.5, 0.4, 'Setosa'],
       [5.4, 3.9, 1.3, 0.4, 'Setosa'],
       [5.1, 3.5, 1.4, 0.3, 'Setosa'],
       [5.7, 3.8, 1.7, 0.3, 'Setosa'],
       [5.1, 3.8, 1.5, 0.3, 'Setosa'],
       [5.4, 3.4, 1.7, 0.2, 'Setosa'],
       [5.1, 3.7, 1.5, 0.4, 'Setosa'],
       [4.6, 3.6, 1.0, 0.2, 'Setosa'],
       [5.1, 3.3, 1.7, 0.5, 'Setosa'],
       [4.8, 3.4, 1.9, 0.2, 'Setosa'],
       [5.0, 3.0, 1.6, 0.

In [4]:
def train_test_split(data, train_split_ratio):
    np.random.shuffle(data)
    train_count = int(data.shape[0] * train_split_ratio)
    train = data[:train_count]
    test = data[train_count:]
    X_train = train[:, :4].astype(float)
    y_train = train[:, 4]
    X_test = test[:, :4].astype(float)
    y_test = test[:, 4]
    return X_train, y_train, X_test, y_test

In [5]:
def binary_cross_entropy(y_true, y_pred):
    m = y_true.shape[0]
    y_pred = np.clip(y_pred, 1e-7, 1 - 1e-7)
    # Calculating loss
    loss = -1/m * (np.dot(y_true.T, np.log(y_pred)) + np.dot((1 - y_true).T, np.log(1 - y_pred)))

    return loss

In [6]:
X_train, y_train, X_test, y_test = train_test_split(data, train_split_ratio=0.9)
X_train, y_train, X_test, y_test

(array([[6.1, 2.8, 4.7, 1.2],
        [5.7, 3.8, 1.7, 0.3],
        [7.7, 2.6, 6.9, 2.3],
        [6. , 2.9, 4.5, 1.5],
        [6.8, 2.8, 4.8, 1.4],
        [5.4, 3.4, 1.5, 0.4],
        [5.6, 2.9, 3.6, 1.3],
        [6.9, 3.1, 5.1, 2.3],
        [6.2, 2.2, 4.5, 1.5],
        [5.8, 2.7, 3.9, 1.2],
        [6.5, 3.2, 5.1, 2. ],
        [4.8, 3. , 1.4, 0.1],
        [5.5, 3.5, 1.3, 0.2],
        [4.9, 3.1, 1.5, 0.1],
        [5.1, 3.8, 1.5, 0.3],
        [6.3, 3.3, 4.7, 1.6],
        [6.5, 3. , 5.8, 2.2],
        [5.6, 2.5, 3.9, 1.1],
        [5.7, 2.8, 4.5, 1.3],
        [6.4, 2.8, 5.6, 2.2],
        [4.7, 3.2, 1.6, 0.2],
        [6.1, 3. , 4.9, 1.8],
        [5. , 3.4, 1.6, 0.4],
        [6.4, 2.8, 5.6, 2.1],
        [7.9, 3.8, 6.4, 2. ],
        [6.7, 3. , 5.2, 2.3],
        [6.7, 2.5, 5.8, 1.8],
        [6.8, 3.2, 5.9, 2.3],
        [4.8, 3. , 1.4, 0.3],
        [4.8, 3.1, 1.6, 0.2],
        [4.6, 3.6, 1. , 0.2],
        [5.7, 4.4, 1.5, 0.4],
        [6.7, 3.1, 4.4, 1.4],
        [4

In [7]:
y_train

array(['Versicolor', 'Setosa', 'Virginica', 'Versicolor', 'Versicolor',
       'Setosa', 'Versicolor', 'Virginica', 'Versicolor', 'Versicolor',
       'Virginica', 'Setosa', 'Setosa', 'Setosa', 'Setosa', 'Versicolor',
       'Virginica', 'Versicolor', 'Versicolor', 'Virginica', 'Setosa',
       'Virginica', 'Setosa', 'Virginica', 'Virginica', 'Virginica',
       'Virginica', 'Virginica', 'Setosa', 'Setosa', 'Setosa', 'Setosa',
       'Versicolor', 'Setosa', 'Setosa', 'Virginica', 'Versicolor',
       'Setosa', 'Setosa', 'Setosa', 'Virginica', 'Versicolor',
       'Versicolor', 'Setosa', 'Setosa', 'Versicolor', 'Virginica',
       'Virginica', 'Versicolor', 'Virginica', 'Versicolor', 'Virginica',
       'Versicolor', 'Setosa', 'Virginica', 'Versicolor', 'Setosa',
       'Setosa', 'Setosa', 'Versicolor', 'Virginica', 'Setosa', 'Setosa',
       'Setosa', 'Versicolor', 'Setosa', 'Versicolor', 'Virginica',
       'Setosa', 'Versicolor', 'Virginica', 'Setosa', 'Virginica',
       'Virginica'

In [8]:
y_train_true = np.array((y_train=="Setosa", y_train=="Virginica", y_train=="Versicolor")).T.astype(int)
y_train_true

array([[0, 0, 1],
       [1, 0, 0],
       [0, 1, 0],
       [0, 0, 1],
       [0, 0, 1],
       [1, 0, 0],
       [0, 0, 1],
       [0, 1, 0],
       [0, 0, 1],
       [0, 0, 1],
       [0, 1, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [0, 0, 1],
       [0, 1, 0],
       [0, 0, 1],
       [0, 0, 1],
       [0, 1, 0],
       [1, 0, 0],
       [0, 1, 0],
       [1, 0, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [0, 0, 1],
       [1, 0, 0],
       [1, 0, 0],
       [0, 1, 0],
       [0, 0, 1],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [0, 1, 0],
       [0, 0, 1],
       [0, 0, 1],
       [1, 0, 0],
       [1, 0, 0],
       [0, 0, 1],
       [0, 1, 0],
       [0, 1, 0],
       [0, 0, 1],
       [0, 1, 0],
       [0, 0, 1],
       [0, 1, 0],
       [0, 0, 1],
       [1, 0, 0],
       [0, 1, 0],
       [0,

In [9]:
y_test_true = np.array((y_test=="Setosa", y_test=="Virginica", y_test=="Versicolor")).T.astype(int)
y_test_true

array([[0, 0, 1],
       [0, 1, 0],
       [0, 1, 0],
       [0, 0, 1],
       [0, 1, 0],
       [0, 0, 1],
       [0, 0, 1],
       [0, 1, 0],
       [0, 1, 0],
       [1, 0, 0],
       [0, 0, 1],
       [0, 1, 0],
       [1, 0, 0],
       [0, 0, 1],
       [0, 1, 0]])

In [10]:
# def sigmoid(x):
#     return 1 / (1 + np.exp(-x))

# def ReLU(x):
#     return x * (x > 0)

# def tanh(x):
#     return np.tanh(x)

# def sigmoid_derivative(x):
#     return x * (1 - x)

# activation = sigmoid

# input_layer_neurons = 4
# hidden_layer_neurons = 5
# output_layer_neurons = 3

# weights_input_hidden = np.random.uniform(-1, 1, (input_layer_neurons, hidden_layer_neurons))
# bias_hidden = np.random.uniform(-1, 1, (1, hidden_layer_neurons))

# weights_hidden_output = np.random.uniform(-1, 1, (hidden_layer_neurons, output_layer_neurons))
# bias_output = np.random.uniform(-1, 1, (1, output_layer_neurons))

# epochs = 20
# learning_rate = 0.1

# for epoch in range(epochs):
#     #propagacja do przodu
#     hidden_layer_input = np.dot(X_train, weights_input_hidden) + bias_hidden
#     hidden_layer_output = activation(hidden_layer_input)

#     output_layer_input = np.dot(hidden_layer_output, weights_hidden_output) + bias_output
#     predicted_output = activation(output_layer_input)

#     error = y_train_true - predicted_output
#     print(error)

In [11]:
import math

def ReLU(x):
    return x * (x > 0)

def tanh(x):
    return np.tanh(x)

def softmax(z):
    """
    Zwraca softmax wzdłuż ostatniego wymiaru (dla każdej próbki osobno).
    Stabilna wersja: odejmujemy max(z), by uniknąć overflow.
    """
    z_shifted = z - np.max(z, axis=1, keepdims=True)
    exp_z = np.exp(z_shifted)
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)

def softmax_cross_entropy(y_true, y_pred_prob):
    """
    Zakładamy:
      y_true: one-hot wektor (np. [0,0,1])
      y_pred_prob: prawdopodobieństwa z softmax (np. [0.1, 0.2, 0.7])
    Zwraca średni cross-entropy (loss) w całej mini-paczkce (tu: w całym zbiorze).
    """
    # Dodajemy mały epsilon, by uniknąć log(0)
    eps = 1e-9
    return -np.mean(np.sum(y_true * np.log(y_pred_prob + eps), axis=1))

def softmax_derivative(y_true, y_pred_prob):
    """
    Pochodna straty cross-entropy względem wyjścia softmax:
    dL/dz = (y_pred_prob - y_true)
    (to klasyczny wynik dla softmax + CE)
    """
    return (y_pred_prob - y_true) / y_true.shape[0]

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(sig_x):
    return sig_x * (1 - sig_x)


def tanh_derivative(x):
    """
    Compute the derivative of the hyperbolic tangent function.
    
    Parameters:
    x (float or np.array): Input value or array of values.
    
    Returns:
    float or np.array: The derivative of tanh at the input value(s).
    """
    tanh_x = np.tanh(x)
    return 1 - tanh_x**2


def xavier(n_in, n_out):
    return math.sqrt(6/(n_in+n_out))


input_layer_neurons = 4
hidden_layer_neurons = 5
output_layer_neurons = 3

a = xavier(input_layer_neurons, output_layer_neurons)

W1 = np.random.uniform(-a, a, (input_layer_neurons, hidden_layer_neurons))
b1 = np.random.uniform(-a, a, (1, hidden_layer_neurons))

W2 = np.random.uniform(-a, a, (hidden_layer_neurons, output_layer_neurons))
b2 = np.random.uniform(-a, a, (1, output_layer_neurons))

epochs = 25
learning_rate = 0.01

for epoch in range(epochs):
    # 1. Forward pass (warstwa ukryta -> warstwa wyjściowa)
    z1 = np.dot(X_train, W1) + b1
    a1 = sigmoid(z1) #tanh(z1)

    z2 = np.dot(a1, W2) + b2
    y_pred_prob = softmax(z2)

    loss = softmax_cross_entropy(y_train_true, y_pred_prob)
    print(f"epoch {epoch+1}/{epochs}, loss: {loss}")

    d_z2 = softmax_derivative(y_train_true, y_pred_prob)

    dW2 = np.dot(a1.T, d_z2)
    db2 = np.sum(d_z2, axis=0, keepdims=True)

    d_a1 = np.dot(d_z2, W2.T)
    d_z1 = d_a1 * sigmoid_derivative(a1) #tanh_derivative(a1)

    dW1 = np.dot(X_train.T, d_z1)
    db1 = np.sum(d_z1, axis=0, keepdims=True)

    W2 -= learning_rate * dW2
    b2 -= learning_rate * db2
    W1 -= learning_rate * dW1
    b1 -= learning_rate * db1


epoch 1/25, loss: 1.5776981186448178
epoch 2/25, loss: 1.5617413182169937
epoch 3/25, loss: 1.5462205179235968
epoch 4/25, loss: 1.5311368544347703
epoch 5/25, loss: 1.5164902739399986
epoch 6/25, loss: 1.5022795895354473
epoch 7/25, loss: 1.4885025511408572
epoch 8/25, loss: 1.4751559226011353
epoch 9/25, loss: 1.4622355613059232
epoch 10/25, loss: 1.4497364965938349
epoch 11/25, loss: 1.4376530042650792
epoch 12/25, loss: 1.4259786755870822
epoch 13/25, loss: 1.4147064801448248
epoch 14/25, loss: 1.4038288226909788
epoch 15/25, loss: 1.3933375947496607
epoch 16/25, loss: 1.383224222107932
epoch 17/25, loss: 1.3734797095001356
epoch 18/25, loss: 1.3640946837784294
epoch 19/25, loss: 1.3550594367065274
epoch 20/25, loss: 1.3463639682566226
epoch 21/25, loss: 1.33799803097611
epoch 22/25, loss: 1.3299511756620037
epoch 23/25, loss: 1.3222127982709282
epoch 24/25, loss: 1.3147721877270229
epoch 25/25, loss: 1.3076185740855897


In [12]:
z1 = np.dot(X_test, W1) + b1
a1 = tanh(z1)

z2 = np.dot(a1, W2) + b2
y_pred_prob = softmax(z2)

_max = y_pred_prob.max(axis=1, keepdims=True)
y_pred = (y_pred_prob==_max).astype(int)

((y_test_true==y_pred).sum(axis=1, keepdims=True)==3).sum() / y_pred.shape[0]

np.float64(0.8666666666666667)