In this notebook I will attempt to predict the MNIST dataset labels using a logistic regression algorithm which I will build from scratch in this notebook (no sklearn).

<hr>

### Libraries:

In [4]:
import numpy as np
import pandas as pd

np.set_printoptions(suppress=True)

from sklearn.model_selection import train_test_split

### Import data:

In [5]:
train_test_data = pd.read_csv("./data/train.csv").dropna(axis=0)
kaggle_submission_data = pd.read_csv("./data/test.csv")

### Separate labels from features:

In [6]:
X = train_test_data.drop("label", axis=1)
y = train_test_data["label"]

### Standartize data:

In [7]:
X = X / 255.

### Split into train and test:

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X.values, y.values, test_size=0.3)

### Reshape matrices:

In [9]:
X_train = X_train.T
y_train = y_train.reshape(y_train.shape[0], -1).T
X_test = X_test.T
y_test = y_test.reshape(y_test.shape[0], -1).T

In [10]:
y_train.shape

(1, 29400)

### Activation function:

In [11]:
def sigmoid(z):
    return 1 / (1 + (np.exp(-z)))

$$ \sigma (z) = \frac{1}{1 + e^{(w^TX+b)}} \ $$

### Parameter initialization:

In [12]:
def initialize_parameters(X):
    m = X.shape[0]
    w = np.zeros((m, 1))
    b = 0
    
    return m, w, b

### Forward and back propagation functions:

$$ z = w^TX + b $$   
$$ A = \sigma (z) $$  
$$ J = -\frac{1}{m}\sum_{i=1}^{m}y^{(i)}\log(a^{(i)})+(1-y^{(i)})\log(1-a^{(i)}) $$

In [13]:
def forward_propagation(X, y, w, b):
    m = y.shape[1]
    z = np.dot(w.T, X) + b
    A = sigmoid(z)
    cost = -1/m * np.sum(np.dot(np.log(A), y.T) + np.dot(np.log(1 - A), (1 - y.T)))
    cost = np.squeeze(cost)
    
    return A, cost

In [14]:
def backward_propagation(X, y, A):
    m = y.shape[1]
    dz = A - y
    dw = 1/m * np.dot(X, dz.T)
    db = 1/m * np.sum(dz)
    
    return dw, db

In [15]:
def optimize(w, b, dw, db, alpha):
    w = w - alpha * dw
    b = b - alpha * db
    
    return w, b

In [16]:
def map_np_array(x, i):
    if i == 0:
        if x == i:
            return 1
        else:
            return i

    if x != i:
        return 0
    else:
        return 1

### Train:

In [17]:
def train(X_train, y_train, epochs, alpha):
    unique_labels = np.unique(y_train)
    weights = {i:None for i in unique_labels}
    
    for i in unique_labels:            
        m, w, b = initialize_parameters(X_train)
        
        map_function = np.vectorize(map_np_array)
        y_train_labeled = map_function(y_train, i)
        
        for j in range(epochs + 1):
            A, _ = forward_propagation(X_train, y_train_labeled, w, b)
            dw, db = backward_propagation(X_train, y_train_labeled, A)
            w, b = optimize(w, b, dw, db, alpha)

        weights[i] = (w, b)
        
    return weights
        

### Predict:

In [18]:
def predict(X, weights_dict):
    result_matrix = None
    
    for i in weights_dict.keys():
        w, b = weights_dict[i]
        z = np.dot(w.T, X) + b
        A = sigmoid(z)
        
        if result_matrix is None:
            result_matrix = A.reshape((1, X.shape[1]))
        else:
            result_matrix = np.concatenate((result_matrix, A), axis=0)    
    
        output_matrix = np.where(result_matrix == np.amax(result_matrix,  axis=0))[0].reshape((1, X.shape[1]))
        
    
    return output_matrix

### Test:

In [19]:
def test(y_hat, y_test):
    predictions = np.equal(y_hat, y_test)
    accuracy = f"{round(np.sum(predictions) / predictions.size * 100, 2)} %"
    
    return accuracy    

### Run model:

In [20]:
model = train(X_train, y_train, 1000, 0.05)

In [21]:
predictions = predict(X_test, model)

In [22]:
accuracy = test(predictions, y_test)

In [23]:
accuracy

'10.27 %'