In [None]:
import numpy as np
import csv

def loadCSV(filename):
    with open(filename,"r") as csvfile:
        lines = csv.reader(csvfile)
        dataset = list(lines)
        for i in range(len(dataset)):
            dataset[i] = [float(x) for x in dataset[i]]
    return np.array(dataset)

In [None]:
def sigmoid(x):
    x = np.float64(x)
    return ( 1.0 / (1.0 + np.exp(-x)))

def cost_function (X, Y, theta, lmbda):
    h = sigmoid(X @ (theta));
    m = len(X)
    J = (-1/m)*np.sum(Y*np.log(h) + (1-Y)*np.log(1-h)) + (lmbda/(2*m))*np.sum(theta[1:] * theta[1:])
    print("J : ",J)
    grad = (1/m)*np.dot(np.transpose(X),(h-Y))
    return [J,grad]

def gradient_descent (X,Y,theta,lr,conv,lmbda):
    [current,grad] = cost_function(X,Y,theta,lmbda)
    prev = 0.0;
    m = len(X)
    h = sigmoid(X @ np.transpose(theta))
    while(abs(current - prev) >= conv):
        theta = theta - (lr*grad)
        prev = current;
        [current,grad] = cost_function(X,Y,theta,lmbda)
        print(abs(current - prev) )
        print(conv)
    return theta

def one_vs_all(X,Y, Num_labels):
    [m, n] = np.shape(X)
    all_theta = np.zeros((Num_labels,n), dtype = np.float64)
    lmbda = 0
    lr = 0.000001
    conv = 0.000001
    print("Training\n")
    for i in range(Num_labels):
        Y_one = (Y==(i))*1
        all_theta[i,:] = gradient_descent(X,Y_one,all_theta[i,:],lr, conv,lmbda)
        print("Trained ",i)
    return all_theta

def predict(X,Y,all_theta, Num_labels):
    h = sigmoid(X @ np.transpose(all_theta))
    prediction = h.argmax(axis=1)
    return prediction

def accuracy(pred,Y):
    err = (pred == Y)
    err = err*1
    acc = (np.sum(err)/len(err))*100
    print("Accuracy is: ",acc,"\n")
    return acc

In [None]:
train = loadCSV("sample_data/mnist_train_small.csv")
test = loadCSV("sample_data/mnist_test.csv")


N_train = len(train)
N_test = len(test)
X_train = train[:,1:]
X_test = test[:,1:]
Y_train = train[:,0]
Y_test = test[:,0];

print(N_train)
print(N_test)
o_train = np.ones((N_train,1),dtype= np.float64)
o_test = np.ones((N_test,1), dtype= np.float64)
X_train = np.concatenate((o_train,X_train),1)
X_test = np.concatenate((o_test,X_test),1)

Num_labels = 10;
Num_features = len(train[0]) + 1
all_theta = loadCSV("sample_data/alltheta.csv")
# all_theta = one_vs_all(X_train,Y_train,Num_labels)

In [None]:
all_theta[0,:].shape

(785,)

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [None]:
all_theta = loadCSV("/content/drive/My Drive/MNIST-params/models/allthetanew.csv")

In [None]:
def create_op_vec(i):
    op = np.zeros((10,1))
    op[i] = 1
    return op

def oneHot(inp):
  outp = []
  for out in inp:
    outp.append(create_op_vec(out))
  return np.array(outp) 

In [None]:
from tensorflow import keras
import numpy as np
handwriting_mnist = keras.datasets.mnist
(train_images,train_labels),(test_images,test_labels) = handwriting_mnist.load_data()

train_images = (train_images.reshape(len(train_images), 784)).T
test_images = (test_images.reshape(len(test_images), 784))
train_labels = train_labels
test_labels = test_labels

In [None]:
o_test = np.ones((len(test_images),1), dtype= np.float64)
test_images = np.concatenate((o_test, test_images),1)
test_images.shape

(10000, 785)

In [None]:
pred2 = predict(test_images, test_labels, all_theta, 10)
acc2 = accuracy(pred2, test_labels)

Accuracy is:  91.07 



In [None]:
from sklearn import metrics
print(metrics.classification_report(test_labels, pred2, digits = 4))

              precision    recall  f1-score   support

           0     0.9457    0.9776    0.9614       980
           1     0.9592    0.9736    0.9663      1135
           2     0.9271    0.8750    0.9003      1032
           3     0.8952    0.9050    0.9000      1010
           4     0.9070    0.9236    0.9152       982
           5     0.9105    0.8330    0.8700       892
           6     0.9276    0.9499    0.9386       958
           7     0.9140    0.9095    0.9118      1028
           8     0.8325    0.8778    0.8546       974
           9     0.8832    0.8692    0.8761      1009

    accuracy                         0.9107     10000
   macro avg     0.9102    0.9094    0.9094     10000
weighted avg     0.9110    0.9107    0.9105     10000



In [None]:
np.savetxt("sample_data/alltheta.csv",all_theta,delimiter=',')