In [140]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import loadmat
from scipy.sparse import coo_matrix
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
import copy
import pickle 
data = loadmat('ex3data1.mat')

# print(data.keys())
#
# print(data['X'])
#
# print(len(data['X']))
#
# print(data['y'])
#
# print(len(data['y']))

X = data['X']
Y = data['y']

# temp = np.reshape(X[0], (20, 20))
# plt.imshow(temp)
# plt.show()


for i in range(len(Y)):
    if Y[i][0] == 10:
        Y[i] = 0
    else:
        Y[i] = Y[i][0]

labels = []
for label in Y:
     labels.append(label[0])

X_sparse = coo_matrix(X)

X, X_sparse, labels = shuffle(X, X_sparse, labels, random_state=0)


X_train, X_test, y_train, y_test = train_test_split(
    X, labels, test_size=0.2, random_state=0)

X_train, X_valid, y_train, y_valid = train_test_split(
    X_train, y_train, test_size=0.2, random_state=0)


In [141]:
X_train[0].shape

(400,)

In [174]:
epochs = 2000
lr = 1e-4

In [175]:
# only for one example
class Linear:
    def __init__(self, input_dim, output_dim):
        self.w = np.random.randn(output_dim, input_dim)
        self.b = np.random.randn(output_dim, 1)
        self.x = None
        self.dw = None
        self.db = None
        
    def forward(self, x):
        self.x = x
        
        out = np.dot(self.w, x) + self.b
        
        return out
    
    def backward(self, dout):
        self.dw = np.dot(dout, self.x.T)
        self.db = dout
        
        return np.dot(self.w.T, dout)

In [176]:
# only for one example
class ReLU:
    def __init__(self):
        self.mask = None
    
    def forward(self, x):
        self.mask = (x <= 0)
        out = x.copy()
        out[self.mask] = 0
        return out
    
    def backward(self, dout):
        dout[self.mask] = 0
        return dout

In [177]:
# only for one example
class softmax:
    def __init__(self):
        self.p = None
        self.t = None
    
    def forward(self, a, t):
        self.t = t
        c = np.max(a)
        exp_a = np.exp(a - c)
        sum_exp_a = np.sum(exp_a)
        p = exp_a / sum_exp_a
        
        self.p = p
        
        _p = np.log(p + 1e-5)

        loss = np.sum(-_p*t)
        
        return loss
    def backward(self):
        
        return self.p - self.t

In [178]:
model = []
layer_1=Linear(400, 300)
model.append(layer_1)

layer_2 = ReLU()
model.append(layer_2)

layer_3=Linear(300, 200)
model.append(layer_3)

layer_4 = ReLU()
model.append(layer_4)

layer_5=Linear(200, 100)
model.append(layer_5)

layer_6 = ReLU()
model.append(layer_6)

layer_7 = Linear(100, 10)
model.append(layer_7)

layer_8 = softmax()
model.append(layer_8)

for layer in model:
    print(layer)

<__main__.Linear object at 0x000002641A3D3048>
<__main__.ReLU object at 0x000002641A3E4D08>
<__main__.Linear object at 0x000002641A197308>
<__main__.ReLU object at 0x000002641A3CFDC8>
<__main__.Linear object at 0x000002641A3D3208>
<__main__.ReLU object at 0x000002641A3D3288>
<__main__.Linear object at 0x000002641A3D3488>
<__main__.softmax object at 0x000002641A3D3708>


In [179]:
def MakeOneHot(class_num, x):
    true = np.zeros(class_num)
    true[x] = 1
    return true.reshape(class_num, -1)

In [180]:
class DNN:
    def __init__(self, model):
        self.model = model
    
    def forward(self, x, y):
        for layer in self.model[0:-1]:
            x = layer.forward(x)
        
        x = model[-1].forward(x, y)
        return x
    
    def backward(self):
        dout = model[-1].backward()
        
        index = len(model) - 2
        while index >= 0:
            dout = model[index].backward(dout)
            index -= 1

In [182]:
net = DNN(model=model)
# loss = net.forward(X_train[0].reshape(-1, 1), MakeOneHot(10, y_train[0]))
# net.backward()
# print(loss)

best_model = None
best_acc = -1
for epoch in range(epochs):
    loss = 0
    for i in range(len(X_train)):

        loss += net.forward(X_train[i].reshape(-1, 1), MakeOneHot(10, y_train[i]))
        net.backward()
        
        # update parameters
        index = len(net.model) - 2
        while index >= 0:
            net.model[index].w -= lr * net.model[index].dw
            net.model[index].b -= lr * net.model[index].db
            index -= 2
        
    print("epoch: {}/{} loss: {}".format(epoch + 1, epochs, loss / len(X_train)))

    valid_correct = 0
    for i in range(len(X_valid)):
        net.forward(X_valid[i].reshape(-1, 1), MakeOneHot(10, y_valid[i]))

        if np.argmax(net.model[-1].p) == y_valid[i]:
            valid_correct += 1
    
    acc = valid_correct / len(X_valid)
    print("Valid Acc: {}".format(acc))
    if best_acc < acc:
        best_acc = acc
        best_model = copy.deepcopy(model)
        print("get new model!")

test_net = DNN(best_model)
test_correct = 0
for i in range(len(X_test)):
    test_net.forward(X_test[i].reshape(-1, 1), MakeOneHot(10, y_test[i]))

    if np.argmax(net.model[-1].p) == y_test[i]:
        test_correct += 1
print("Test Acc: {}".format(test_correct / len(X_test)))

epoch: 1/50 loss: 1.181942048163183
Valid Acc: 0.505
get new model!
epoch: 2/50 loss: 1.176585631509903
Valid Acc: 0.51625
get new model!
epoch: 3/50 loss: 1.2037518028077903
Valid Acc: 0.51375
epoch: 4/50 loss: 1.160296784306244
Valid Acc: 0.51375
epoch: 5/50 loss: 1.1613318162199198
Valid Acc: 0.51375
epoch: 6/50 loss: 1.1534894684830612
Valid Acc: 0.5175
get new model!
epoch: 7/50 loss: 1.1480975550823755
Valid Acc: 0.5175
epoch: 8/50 loss: 1.1374665170799505
Valid Acc: 0.5225
get new model!
epoch: 9/50 loss: 1.1367667048555958
Valid Acc: 0.52375
get new model!
epoch: 10/50 loss: 1.1189000632342323
Valid Acc: 0.52625
get new model!
epoch: 11/50 loss: 1.1219636500526522
Valid Acc: 0.5375
get new model!
epoch: 12/50 loss: 1.110218748068715
Valid Acc: 0.52875
epoch: 13/50 loss: 1.1181647138715392
Valid Acc: 0.5225
epoch: 14/50 loss: 1.0962171039602133
Valid Acc: 0.525
epoch: 15/50 loss: 1.1000396544959856
Valid Acc: 0.5275
epoch: 16/50 loss: 1.0866638721884998
Valid Acc: 0.52875
epoch:

KeyboardInterrupt: 