In [1]:
# library imports
import matplotlib.pyplot as plt
import numpy as np
import requests 
import gzip
import os
import random

In [2]:
files = [
     "train-images-idx3-ubyte.gz"
    ,"train-labels-idx1-ubyte.gz"
    ,"t10k-images-idx3-ubyte.gz"
    ,"t10k-labels-idx1-ubyte.gz"
]

def fetch(file_name,base_url = r"http://yann.lecun.com/exdb/mnist/"):
    url = "".join([base_url,file_name])
    
    file_path = os.path.join(os.getcwd(),file_name)
    
    if os.path.isfile(file_path):
        print('File exists.')
        with open(file_path,mode='rb') as file:
            data = file.read()
    else:
        with open(file_path,mode='wb') as file:
            print('File doesn\'t exist')
            data = requests.get(url).content
            file.write(data)
            
    return np.frombuffer(gzip.decompress(data),dtype='uint8').copy()
    

In [3]:
# get data 
X_train = fetch(files[0])[16:].reshape(-1,28,28).reshape(60000,28*28)
y_train = fetch(files[1])[8:].reshape(-1,1)
X_test = fetch(files[2])[16:].reshape(-1,28,28).reshape(10000,28*28)
y_test = fetch(files[3])[8:].reshape(-1,1)

File exists.
File exists.
File exists.
File exists.


In [4]:
# functions

def sigmoid(x):
    return 1/(1+np.exp(-x))

def predict(x,beta):
    return sigmoid(np.dot(x,beta))

def gradient_step(beta,grad,step_size=0.01):
    return beta - step_size*grad

def gradient(x,beta,y_true):
    y_pred = predict(x,beta)
    
    return np.dot(np.transpose(x),(y_pred-y_true))/len(x)

In [5]:
_X_train = np.concatenate([np.ones(shape=(len(X_train),1),dtype='uint8'),X_train/255],axis=1)
_X_test = np.concatenate([np.ones(shape=(len(X_test),1),dtype='uint8'),X_test/255],axis=1)
_X_train

array([[1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       ...,
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.]])

In [6]:
betas = np.zeros(shape=(_X_train.shape[-1],len(np.unique(y_train))))
for _class in np.unique(y_train):
    betas[:,_class] = [random.random() for _ in range(len(betas))]
betas

array([[0.94900838, 0.31831055, 0.8573371 , ..., 0.89657499, 0.79285003,
        0.72976455],
       [0.31036156, 0.89264948, 0.44899933, ..., 0.36894336, 0.81299966,
        0.28844186],
       [0.53581917, 0.71427855, 0.57444364, ..., 0.96122787, 0.93122161,
        0.88932024],
       ...,
       [0.51879976, 0.91845152, 0.84439345, ..., 0.20615025, 0.9668118 ,
        0.51373147],
       [0.63399877, 0.85110701, 0.72287598, ..., 0.39927117, 0.29836196,
        0.8328765 ],
       [0.09722396, 0.49767523, 0.20932396, ..., 0.67837879, 0.06614744,
        0.57407812]])

In [7]:
epochs = 10000
for _class in np.unique(y_train):
    beta = betas[:,_class].reshape(-1,1)
    for epoch in range(epochs):
        grad = gradient(x=_X_train,beta=beta,y_true=(y_train==_class).astype('int'))
        beta = gradient_step(beta=beta,grad=grad,step_size=0.1)
    betas[:,_class] = beta.flat[:]

In [8]:
betas

array([[-3.57388359, -1.75047252, -2.99808312, ..., -1.39988536,
        -6.53871189, -3.89485813],
       [ 0.31036156,  0.89264948,  0.44899933, ...,  0.36894336,
         0.81299966,  0.28844186],
       [ 0.53581917,  0.71427855,  0.57444364, ...,  0.96122787,
         0.93122161,  0.88932024],
       ...,
       [ 0.51879976,  0.91845152,  0.84439345, ...,  0.20615025,
         0.9668118 ,  0.51373147],
       [ 0.63399877,  0.85110701,  0.72287598, ...,  0.39927117,
         0.29836196,  0.8328765 ],
       [ 0.09722396,  0.49767523,  0.20932396, ...,  0.67837879,
         0.06614744,  0.57407812]])

In [9]:
# accuracy
(np.argmax(predict(_X_train,betas),axis=1).reshape(-1,1) == y_train).mean()

0.9124333333333333

In [10]:
# accuracy
(np.argmax(predict(_X_test,betas),axis=1).reshape(-1,1) == y_test).mean()

0.9152