In [1]:
# library imports
import matplotlib.pyplot as plt
import numpy as np
import requests 
import gzip
import os
import random

In [2]:
files = [
     "train-images-idx3-ubyte.gz"
    ,"train-labels-idx1-ubyte.gz"
    ,"t10k-images-idx3-ubyte.gz"
    ,"t10k-labels-idx1-ubyte.gz"
]

def fetch(file_name,base_url = r"http://yann.lecun.com/exdb/mnist/"):
    url = "".join([base_url,file_name])
    
    file_path = os.path.join(os.getcwd(),file_name)
    
    if os.path.isfile(file_path):
        print('File exists.')
        with open(file_path,mode='rb') as file:
            data = file.read()
    else:
        with open(file_path,mode='wb') as file:
            print('File doesn\'t exist')
            data = requests.get(url).content
            file.write(data)
            
    return np.frombuffer(gzip.decompress(data),dtype='uint8').copy()
    

In [3]:
# get data 
X_train = fetch(files[0])[16:].reshape(-1,28,28).reshape(60000,28*28)
y_train = fetch(files[1])[8:].reshape(-1,1)
X_test = fetch(files[2])[16:].reshape(-1,28,28).reshape(10000,28*28)
y_test = fetch(files[3])[8:].reshape(-1,1)

File exists.
File exists.
File exists.
File exists.


In [4]:
# functions

def sigmoid(x):
    return 1/(1+np.exp(-x))

def predict(x,beta):
    return sigmoid(np.dot(x,beta))

def gradient_step(beta,grad,step_size=0.01):
    return beta - step_size*grad

def gradient(x,beta,y_true):
    y_pred = predict(x,beta)
    
    return np.dot(np.transpose(x),(y_pred-y_true))/len(x)

In [9]:
_X_train = np.concatenate([np.ones(shape=(len(X_train),1),dtype='uint8'),X_train/255],axis=1)
_X_test = np.concatenate([np.ones(shape=(len(X_test),1),dtype='uint8'),X_test/255],axis=1)
_X_train

array([[1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       ...,
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.]])

In [10]:
betas = np.zeros(shape=(_X_train.shape[-1],len(np.unique(y_train))))
for _class in np.unique(y_train):
    betas[:,_class] = [random.random() for _ in range(len(betas))]
betas

array([[0.3637683 , 0.24411957, 0.30329677, ..., 0.69869893, 0.03057352,
        0.06731253],
       [0.11146246, 0.41700513, 0.79937181, ..., 0.82697239, 0.02409663,
        0.88215078],
       [0.70372545, 0.45007404, 0.3754152 , ..., 0.3879104 , 0.82390822,
        0.70806019],
       ...,
       [0.14313535, 0.62793024, 0.11809129, ..., 0.16679237, 0.71647557,
        0.12507765],
       [0.11275735, 0.4041311 , 0.96240302, ..., 0.98106132, 0.67491007,
        0.96409064],
       [0.88198903, 0.9766257 , 0.44267047, ..., 0.49070328, 0.38218901,
        0.62466035]])

In [11]:
epochs = 10000
for _class in np.unique(y_train):
    beta = betas[:,_class].reshape(-1,1)
    for epoch in range(epochs):
        grad = gradient(x=_X_train,beta=beta,y_true=(y_train==_class).astype('int'))
        beta = gradient_step(beta=beta,grad=grad,step_size=0.1)
    betas[:,_class] = beta.flat[:]

In [12]:
betas

array([[-3.7962427 , -1.79900569, -3.16364639, ..., -1.54495567,
        -6.62816593, -4.01619244],
       [ 0.11146246,  0.41700513,  0.79937181, ...,  0.82697239,
         0.02409663,  0.88215078],
       [ 0.70372545,  0.45007404,  0.3754152 , ...,  0.3879104 ,
         0.82390822,  0.70806019],
       ...,
       [ 0.14313535,  0.62793024,  0.11809129, ...,  0.16679237,
         0.71647557,  0.12507765],
       [ 0.11275735,  0.4041311 ,  0.96240302, ...,  0.98106132,
         0.67491007,  0.96409064],
       [ 0.88198903,  0.9766257 ,  0.44267047, ...,  0.49070328,
         0.38218901,  0.62466035]])

In [13]:
# accuracy
(np.argmax(predict(_X_train,betas),axis=1).reshape(-1,1) == y_train).mean()

0.9127666666666666

In [14]:
# accuracy
(np.argmax(predict(_X_test,betas),axis=1).reshape(-1,1) == y_test).mean()

0.9136