In [2]:
import numpy as np
import pandas as pd

In [123]:
df = pd.read_csv('train.csv')

In [124]:
df = np.array(df)
np.random.shuffle(df)

In [125]:
m, n = df.shape
print(m , n)

42000 785


In [126]:
df_train = df[0:33600].T
Y_train = df_train[0]
X_train = df_train[1:n]
X_train = X_train / 255
# 80 / 20
df_test = df[33600:m].T
Y_test = df_test[0]
X_test = df_test[1:n]
X_test = X_test / 255

In [106]:
def params_config():
    n_input = 784  # Amount of pixels in an image
    n_hidden = 64
    n_output = 10  # Amount of classes

    # W - Wagi
    # b - "biasy"
    W1 = np.random.rand(n_hidden,n_input) - 0.5
    b1 = np.random.rand(n_hidden,1) - 0.5
    W2 = np.random.rand(n_output,n_hidden) - 0.5
    b2 = np.random.rand(n_output,1) - 0.5

    print("X shape:", X_train.shape)
    print("W1 shape:", W1.shape)
    print("b1 shape:", b1.shape)

    return W1, b1, W2, b2

In [59]:
def softmax(Z):
    expZ = np.exp(Z- np.max(Z, axis=0, keepdims=True))
    return expZ/np.sum(expZ,axis=0,keepdims=True)

In [80]:
def ReLU(Z):
    return np.maximum(0, Z)

In [81]:
def forward_propagation(W1, b1, W2, b2, X):
    Z1 = np.dot(W1, X) + b1
    A1 = ReLU(Z1)
    Z2 = np.dot(W2, A1) + b2
    A2 = softmax(Z2)
    return Z1, A1, Z2, A2

In [82]:
def label_to_matrix(Y):
    label_matrix = np.zeros((Y.size, Y.max() + 1)) # Jak mamy od 0 do 9 to Y.max()=9 więc + 1 by mieć 10 rzędów
    label_matrix[np.arange(Y.size), Y] = 1 # W każdym rzędzie idziemy do pozycji z Y i zamieniamy na 1 bombastyczna sprawa
    return label_matrix.T

In [83]:
def ReLU_derivative(Z):
    return Z > 0

In [84]:
def back_propagation(Z1, A1, Z2, A2, W2, X, Y):
    label_matrix = label_to_matrix(Y)
    m = Y.size # liczba rzędów
    dZ2 = A2 - label_matrix
    dW2 = 1/m * np.dot(dZ2,A1.T)
    db2 = 1/m * np.sum(dZ2, axis=1, keepdims=True)
    dZ1 = np.dot(W2.T,dZ2) * ReLU_derivative(Z1)
    dW1 = 1/m * np.dot(dZ1,X.T)
    db1 = 1/m * np.sum(dZ1, axis=1, keepdims=True)
    return dW1, db1, dW2, db2

In [85]:
def update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha):
    W1 = W1 - alpha * dW1
    b1 = b1 - alpha * db1
    W2 = W2 - alpha * dW2
    b2 = b2 - alpha * db2
    return W1, b1, W2, b2

In [102]:
def get_accuracy(A2, Y):
    predictions = np.argmax(A2, axis=0)
    return np.mean(predictions == Y)

In [127]:
def grad_descent(X,Y,epochs,alpha): 
    W1, b1, W2, b2 = params_config()
    label_matrix = label_to_matrix(Y)
    for i in range(epochs):
        Z1, A1, Z2, A2 = forward_propagation(W1, b1, W2, b2, X)
        dW1, db1, dW2, db2 = back_propagation(Z1, A1, Z2, A2, W2, X, Y)
        W1, b1, W2, b2 = update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha)
        if i % 50 == 0:
            acc = get_accuracy(A2, Y)
            print(f"Epoch {i} |Accuracy: {acc:.4f}")
    return W1, b1, W2, b2

In [128]:
W1, b1, W2, b2 = grad_descent(X_train,Y_train,2000,0.05)

X shape: (784, 42000)
W1 shape: (64, 784)
b1 shape: (64, 1)
Epoch 0 |Accuracy: 0.1236
Epoch 50 |Accuracy: 0.5907
Epoch 100 |Accuracy: 0.7046
Epoch 150 |Accuracy: 0.7599
Epoch 200 |Accuracy: 0.7907
Epoch 250 |Accuracy: 0.8103
Epoch 300 |Accuracy: 0.8249
Epoch 350 |Accuracy: 0.8366
Epoch 400 |Accuracy: 0.8460
Epoch 450 |Accuracy: 0.8539
Epoch 500 |Accuracy: 0.8601
Epoch 550 |Accuracy: 0.8655
Epoch 600 |Accuracy: 0.8690
Epoch 650 |Accuracy: 0.8736
Epoch 700 |Accuracy: 0.8772
Epoch 750 |Accuracy: 0.8803
Epoch 800 |Accuracy: 0.8834
Epoch 850 |Accuracy: 0.8861
Epoch 900 |Accuracy: 0.8885
Epoch 950 |Accuracy: 0.8910
Epoch 1000 |Accuracy: 0.8931
Epoch 1050 |Accuracy: 0.8953
Epoch 1100 |Accuracy: 0.8975
Epoch 1150 |Accuracy: 0.8993
Epoch 1200 |Accuracy: 0.9007
Epoch 1250 |Accuracy: 0.9023
Epoch 1300 |Accuracy: 0.9035
Epoch 1350 |Accuracy: 0.9051
Epoch 1400 |Accuracy: 0.9064
Epoch 1450 |Accuracy: 0.9077
Epoch 1500 |Accuracy: 0.9091
Epoch 1550 |Accuracy: 0.9107
Epoch 1600 |Accuracy: 0.9119
Epoch 

In [131]:
Z1, A1, Z2, A2 = forward_propagation(W1, b1, W2, b2, X_test)
print(get_accuracy(A2, Y_test))

0.9196428571428571
