In [1]:
import numpy as np
from scipy.special import expit
from sklearn.utils import shuffle

In [2]:
# Training data:
data = open('mnist_train.csv', "r")
X = []
y = []
for line in data:
    label = int(line.split(',')[0].strip())
    if label == 8 or label == 1:
        y.append(0 if label == 8 else 1)
        r = line.split(',')[1:]
        x = []
        for val in r:
            x.append(int(val.strip()))
        X.append(x)
        
y = np.array(y)
X = np.array(X)
X = X/255
X_actual = X

We have a matrix X = n x m, ie, n images with m features. <br />
x<sub>i</sub> is ith row of X

In [3]:
n = X.shape[0]

max_iter = 500
alpha = 1e-3
b = np.random.rand()

act = []

cost = []
eps = 1e-6

In [4]:
def cost_func(y, a):
    one = np.ones(len(y))
    c = 0
    for i in range(len(y)):
        yi = y[i]
        ln_a1 = np.log(a[i]) if a[i] == 1 else 1e8
        ln_a2 = np.log(1 - a[i]) if a[i] != 1 and a[i] > 0 else 1e8
        c = c + yi*ln_a1 + (1 - yi)*ln_a2
    return c

# def act_func(w, xi, bias):
#     return expit(w@xi + bias)

In [5]:
X = X.T

In [6]:
'''
w is 1 by 784 matrix
xi is 784 by 1 matrix => X.T
The formula in use (slides):
    z = wT@xi + b
Here, w = [784 x 1],    in python: [ [1], [2], [3], ... [784] ]
So, wT = [1 x 784],     in python: [ 1, 2, 3, ... 784 ]
X = [ x1 x2 x3 ... xn ],        where n = 12593
in python X = [ [x1], [x2], [x3], ... [xn] ]
therefore, xi = [1 x 784] in python
According to slides:
xi = [784 x 1]
in code, xi = [1 x 784]
therefore, we have to use: xiT => xi.T in python,
hence, we use X = X.T for our code
y = [ 1, 2, 3, ... n ],         where n = 12593
In slides y = [784 x 1]
In python y = [1 x 784]
'''
wT = np.random.rand(784)     # random weights initialised
for i in range(max_iter):    
    a = expit(wT@X + b)
    '''
    The slides say:
        w = w + alpha * (ai - yi) * xi
    therefore,
        wT = wT + alpha * (ai - yi) * xiT
    '''     
    wT = wT - alpha*(a - y)@X.T
    b = b - alpha*(a - y).sum()    
    # Finding the cost
    c = cost_func(y, a)
    cost.append(c)        
    
    if i > 0:
        if abs(cost[i - 1] - cost[i]) <= eps:
            print(i)
            break

In [14]:
test = open('test.txt', 'r')
X_test = []
for line in test:
    r = line.split(',')[:]
    x_t = []
    for val in r:
        x_t.append(int(val.strip()))
    X_test.append(x_t)

X_test = np.array(X_test)
X_test = X_test/255
X_test = X_test.T

log_act = expit(wT@X_test + b)
log_pred = np.round(log_act)

In [15]:
# Question 1:
format_2f =  [ '%.2f' % ele for ele in X_actual[np.random.randint(0,X_actual.shape[0]-1,1)[0]] ]
s = ""
i = 0
for num in format_2f:
    s = s + str(num)
    i += 1
    if i != len(format_2f):
         s = s + ", "
file1 = open("q1.txt", "w") 
file1.write(s)
file1.close()


In [16]:
# Question 2:
s = ""
for w in wT:
    format_4f =  '%.4f' % w
    s = s + str(format_4f) + ", "    
s = s + str(b)
file1 = open("q2.txt", "w") 
file1.write(s)
file1.close()

In [17]:
# Question 3:
format_2f =  [ '%.2f' % ele for ele in log_act]
s = ""
i = 0
for num in format_2f:
    s = s + str(num)
    i += 1
    if i != len(format_2f):
         s = s + ", "
file1 = open("q3.txt", "w") 
file1.write(s)
file1.close()



In [18]:
# Question 4:
format_2f =  [ '%.0f' % ele for ele in log_pred]
s = ""
i = 0
for num in format_2f:
    s = s + str(num)
    i += 1
    if i != len(format_2f):
         s = s + ", "
file1 = open("q4.txt", "w") 
file1.write(s)
file1.close()

In [19]:
# import matplotlib.pyplot as plt
# for i in range(200):
#     pixels = X_test[:, i]
#     pixels = pixels.reshape((28, 28))
#     l = str(i+1) + "th pic is Label: " + str(log_pred[i])
#     plt.title(l)
#     plt.imshow(pixels, cmap='gray')
#     plt.show()


# Neural Network

In [20]:
# lr = 0.01
# hidden_units = int(X_actual.shape[1] / 2)
# num_train = len(X_actual)
# train_x = np.hstack((X_actual, np.ones(num_train).reshape(-1,1)))
# train_y = y
# input_units = train_x.shape[1]
#
# w_i_h =  np.random.uniform(low=-1, high=1, size=(hidden_units, input_units))
# w_h_o = np.random.uniform(low=-1, high=1, size=(1, hidden_units + 1))
# prev = 0
# for _ in range(max_iter):
#     out_o = np.zeros(num_train)
#     out_h = np.zeros((num_train, hidden_units+1))       # +1 for bias
#     out_h[:, -1] = 1
#     for i in range(num_train):
#         row = train_x[i]
#         z_i_h = w_i_h@row
#         out_h[i, :-1] = expit(z_i_h)
#         out_o[i] = expit((out_h[i]@w_h_o.T)[0])
#
#         diff = expit(out_h[i])@expit(1 - out_h[i])
#         delta = np.multiply(diff, (train_y[i] - out_o[i]) * np.squeeze(w_h_o))
#
#         w_i_h += lr*(np.expand_dims(delta[:-1], axis=1)@np.expand_dims(row, axis=0))
#         w_h_o += np.expand_dims(lr * (train_y[i] - out_o[i]) * out_h[i,:], axis=0)
#
#     error = cost_func(y, out_o)
#     num_correct = sum((out_o > 0.5).astype(int) == train_y)
#     print('epoch = ', _, ' error = {:.7}'.format(error), 'correctly classified = {:.4%}'.format(num_correct / num_train))
#     if _ > 1 and abs(error - prev) <= eps:
#         print("done")
#         break
#     prev = error



In [12]:
input_units = X_actual.shape[1]
hidden_units = int(input_units / 2)
w_1 = np.random.uniform(-1,1,(hidden_units, input_units))
w_j2 = np.random.uniform(-1,1,(1, hidden_units))  #wj2
prev_error = 0
bias_h = np.ones(hidden_units).reshape((392,1))
bias_o = 1
Cb1 = np.zeros((392,1))
Cb2 = 0
Cw1 = np.zeros((hidden_units, input_units))
Cw2 = np.zeros((1, 392))
for _ in range(max_iter):
    print("i: =", _)
    X = X_actual
    y_rand = y
    act = []
    X, y_rand = shuffle(X, y_rand)
    for i in range(len(X)):
        xi = X[i].reshape((X.shape[1], 1))
        z_i_h = w_1@xi + bias_h
        aij = expit(z_i_h)
        z_h_o = w_j2@aij + bias_o
        ai = expit(z_h_o)[0][0]
        yi = y[i]

        Cb2 = np.squeeze((ai - yi)*ai*(1-ai))
        Cw2 = Cb2*aij
        Cb1 = np.multiply(np.multiply(Cb2*w_j2.T,aij),(1 - aij))
        Cw1 = Cb1@xi.T

        w_1 = w_1 - alpha*Cw1
        bias_h = bias_h - alpha*Cb1
        w_j2 = w_j2 - alpha*Cw2.T
        bias_o = bias_o - alpha*Cb2

        act.append(ai)

    C = cost_func(y_rand, act)
    if _ > 1 and abs(C - prev_error) <= eps:
        print(_)
        break
    print("C: ", C)
    print("prev: ", prev_error)
    acc = sum((np.array(act) > 0.5).astype(int) == y_rand)
    print("Accuracy: ", acc/len(y_rand))
    prev_error = C

i: = 0


KeyboardInterrupt: 

In [22]:
nn_act = []

for i in range(len(X_test)):
    xi = X_test[i].reshape((X_test.shape[0], 1))
    z_i_h = w_1@xi + bias_h
    aij = expit(z_i_h)
    z_h_o = w_j2@aij + bias_o
    ai = expit(z_h_o)[0][0]
    nn_act.append(ai)

nn_pred = np.round(nn_act)

ValueError: cannot reshape array of size 200 into shape (784,1)

In [None]:
t = np.array([[1,11,111],[2,22,222],[3,33,333],[4,44,444]])
#train_x = np.hstack((t, np.ones(len(t)).reshape(-1,1)))
# temp2 = np.array([1,2,3])
#temp, temp2 = shuffle(temp, temp2)
print(t)
print(t[0, :-1], "\n\n")

In [None]:
t2 = np.squeeze(np.array([1]))
print(t2)

In [None]:
print(nn_pred)
