In [51]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder


In [52]:
x_train=pd.read_csv('train_X.csv')
y_train=pd.read_csv('train_label.csv').to_numpy()
x_test=pd.read_csv('test_X.csv')
y_test=pd.read_csv('test_label.csv').to_numpy()


In [53]:
y_train

array([[1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.]])

In [54]:
x_train=x_train / 255.0
x_test=x_test /255.0


In [55]:
def initialize_parameters(layer_dims):
    np.random.seed(1)
    parameters = {}
    for l in range(1, len(layer_dims)):
        parameters['W' + str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1]) * 0.01
        parameters['b' + str(l)] = np.zeros((layer_dims[l], 1))
    return parameters

In [56]:
    
def relu(Z):
    return np.maximum(0, Z)

def softmax(Z):
    expZ = np.exp(Z - np.max(Z, axis=0, keepdims=True))
    return expZ / expZ.sum(axis=0, keepdims=True)


In [57]:
def forward_propagation(X, parameters):
    caches = {}
    A = X.T
    L = len(parameters) // 2 
    
    
    for l in range(1, L):
        Z = parameters['W' + str(l)].dot(A) + parameters['b' + str(l)]
        A = relu(Z)
        caches['A' + str(l)] = A
        caches['Z' + str(l)] = Z

    
    ZL = parameters['W' + str(L)].dot(A) + parameters['b' + str(L)]
    AL = softmax(ZL)
    caches['A' + str(L)] = AL
    caches['Z' + str(L)] = ZL

    return AL, caches

In [58]:
def cost_function(AL, Y):
    m = Y.shape[0]
    cost = -np.sum(Y.T * np.log(AL + 1e-8)) / m
    return np.squeeze(cost)

def relu_derivative(Z):
    return Z > 0

In [59]:
def backward_propagation(X, Y, caches, parameters):
    grads = {}
    L = len(parameters) // 2
    m = X.shape[0]
    Y = Y.T
    A_prev = X.T

    
    dZL = caches['A' + str(L)] - Y
    grads['dW' + str(L)] = dZL.dot(caches['A' + str(L-1)].T) / m
    grads['db' + str(L)] = np.sum(dZL, axis=1, keepdims=True) / m

   
    for l in reversed(range(1, L)):
        dA = parameters['W' + str(l+1)].T.dot(dZL)
        dZ = dA * relu_derivative(caches['Z' + str(l)])
        A_prev = X.T if l == 1 else caches['A' + str(l-1)]
        grads['dW' + str(l)] = dZ.dot(A_prev.T) / m
        grads['db' + str(l)] = np.sum(dZ, axis=1, keepdims=True) / m
        dZL = dZ

    return grads

In [60]:
def update_parameters(parameters, grads, learning_rate):
    L = len(parameters) // 2
    for l in range(1, L + 1):
        parameters['W' + str(l)] -= learning_rate * grads['dW' + str(l)]
        parameters['b' + str(l)] -= learning_rate * grads['db' + str(l)]
    return parameters

In [61]:
def train_model(X, Y, layer_dims, learning_rate=0.01, num_epochs=100):
    parameters = initialize_parameters(layer_dims)
    for epoch in range(num_epochs):
       
        AL, caches = forward_propagation(X, parameters)
        
       
        cost = cost_function(AL, Y)
        
        
        grads = backward_propagation(X, Y, caches, parameters)
        
        
        parameters = update_parameters(parameters, grads, learning_rate)
        
        if epoch % 10 == 0:
            print(f"Cost after epoch {epoch}: {cost:.4f}")
    return parameters


In [62]:
def predict(X, parameters):
    AL, _ = forward_propagation(X, parameters)
    predictions = np.argmax(AL, axis=0)
    return predictions

def accuracy(predictions, Y):
    true_labels = np.argmax(Y, axis=1)
    return np.mean(predictions == true_labels)

# x_test_first=x_test.iloc[0,:]




layer_dims = [784,256, 10]



parameters = train_model(x_train, y_train, layer_dims, learning_rate=0.1, num_epochs=1000)


train_preds = predict(x_train, parameters)
test_preds = predict(x_test, parameters)

print(f"Train accuracy: {accuracy(train_preds, y_train) * 100:.2f}%")
print(f"Test accuracy: {accuracy(test_preds, y_test) * 100:.2f}%")

Cost after epoch 0: 2.3034
Cost after epoch 10: 2.2745
Cost after epoch 20: 2.2233
Cost after epoch 30: 2.1222
Cost after epoch 40: 1.9478
Cost after epoch 50: 1.7055
Cost after epoch 60: 1.4438
Cost after epoch 70: 1.2163
Cost after epoch 80: 1.0402
Cost after epoch 90: 0.9072
Cost after epoch 100: 0.8056
Cost after epoch 110: 0.7261
Cost after epoch 120: 0.6622
Cost after epoch 130: 0.6098
Cost after epoch 140: 0.5660
Cost after epoch 150: 0.5288
Cost after epoch 160: 0.4968
Cost after epoch 170: 0.4688
Cost after epoch 180: 0.4442
Cost after epoch 190: 0.4222
Cost after epoch 200: 0.4025
Cost after epoch 210: 0.3846
Cost after epoch 220: 0.3683
Cost after epoch 230: 0.3532
Cost after epoch 240: 0.3393
Cost after epoch 250: 0.3264
Cost after epoch 260: 0.3144
Cost after epoch 270: 0.3031
Cost after epoch 280: 0.2924
Cost after epoch 290: 0.2824
Cost after epoch 300: 0.2729
Cost after epoch 310: 0.2639
Cost after epoch 320: 0.2553
Cost after epoch 330: 0.2472
Cost after epoch 340: 0.2