In [1]:
import numpy as np 
from neuralnetwork import NeuralNetwork
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn import datasets

In [2]:
#load the MNIST dataset and apply min/max scaling 
print("[INFO] loading MNIST (sample) dataset...")
digits = datasets.load_digits()
data = digits.data.astype("float")
data = (data - data.min()) / (data.max() - data.min())
print("[INFO] samples: {}, dim: {}".format(data.shape[0],
    data.shape[1]))
(trainX, testX, trainY, testY) = train_test_split(data,
    digits.target, test_size = 0.25)

#convert labels from integers to vectors
trainY = LabelBinarizer().fit_transform(trainY)
testY = LabelBinarizer().fit_transform(testY)

[INFO] loading MNIST (sample) dataset...
[INFO] samples: 1797, dim: 64


In [65]:

layers = [trainX.shape[1], 32, 16, 10]
alpha = 0.01
W = []

In [66]:
for i in np.arange(0, len(layers)-2):
    w = np.random.randn(layers[i]+1, layers[i+1]+1)
    
    W.append(w / np.sqrt(layers[i]))
    #print(np.shape(w))
    #print(np.shape(W[i]))
            # the last two layers are a special case where the input
            # connections need a bias term but the output does not
w = np.random.randn(layers[-2]+1, layers[-1])
W.append(w / np.sqrt(layers[-2]))
for i in np.arange(0, len(W)):
    print(np.shape(W[i]))

(65, 33)
(33, 17)
(17, 10)


In [53]:
np.shape(W[1])
len(layers)
#print(trainX.shape[1])

4

In [67]:
w = np.random.randn(layers[1]+1, layers[1+1]+1)
np.shape(w)

(33, 17)

In [68]:
layers[1]+1

33

In [69]:
def sigmoid( x):
    return 1.0 / (1 + np.exp(-x))
def sigmoid_deriv( x):
    return x * (1 - x)

In [70]:

X = np.c_[trainX, np.ones((trainX.shape[0]))]
A = [np.atleast_2d(X)]

In [73]:
for l in np.arange(0, len(W)):
    #print(l)
    #print(A[l])
    #print(W[l])
    net = A[l].dot(W[l])
    #print(net)
    out = sigmoid(net)
    #print(out)
    A.append(out)
#print(A)

In [74]:
len(A)

4

In [77]:
error = A[-1]- trainY

In [78]:
D = [error * sigmoid_deriv(A[-1])]

In [79]:
D

[array([[ 0.14501224,  0.14757654, -0.13393806, ...,  0.14668454,
          0.14792611,  0.07198487],
        [ 0.14494706,  0.14758896,  0.11564581, ..., -0.08883904,
          0.1477664 ,  0.07463203],
        [ 0.1450325 ,  0.14760551,  0.11572912, ...,  0.14695423,
          0.14781795,  0.07065635],
        ...,
        [ 0.14480456,  0.14767138,  0.11560963, ...,  0.14683944,
          0.14765231,  0.07249982],
        [ 0.14502326,  0.14745636,  0.11563113, ...,  0.14660575,
         -0.07828061,  0.0714121 ],
        [ 0.14533724,  0.14753704,  0.11665479, ...,  0.1465842 ,
          0.14787212,  0.07006298]])]

In [80]:
for l in np.arange(len(A)-2,0,-1):
    delta = D[-1].dot(W[l].T)
    delta = delta * sigmoid_deriv(A[l])
    D.append(delta)

In [81]:
D

[array([[ 0.14501224,  0.14757654, -0.13393806, ...,  0.14668454,
          0.14792611,  0.07198487],
        [ 0.14494706,  0.14758896,  0.11564581, ..., -0.08883904,
          0.1477664 ,  0.07463203],
        [ 0.1450325 ,  0.14760551,  0.11572912, ...,  0.14695423,
          0.14781795,  0.07065635],
        ...,
        [ 0.14480456,  0.14767138,  0.11560963, ...,  0.14683944,
          0.14765231,  0.07249982],
        [ 0.14502326,  0.14745636,  0.11563113, ...,  0.14660575,
         -0.07828061,  0.0714121 ],
        [ 0.14533724,  0.14753704,  0.11665479, ...,  0.1465842 ,
          0.14787212,  0.07006298]]),
 array([[ 0.00868649, -0.04840841, -0.02683441, ...,  0.06106463,
          0.00299631,  0.01281303],
        [-0.02901593, -0.01328831,  0.03299212, ...,  0.03296133,
         -0.00606459,  0.00337164],
        [-0.00524183, -0.02234621,  0.01691591, ...,  0.01473409,
         -0.00826426,  0.02473794],
        ...,
        [-0.00542826, -0.02236605,  0.01668727, ...,  

In [82]:
D = D[::-1]

In [93]:
print(D)
print("*****")
print(A[1])
print("*****")
#print(D[0])
#print(A[0].T.dot(D[0]))
print(len(D))

[array([[-0.00389272, -0.0040701 , -0.00851073],
       [ 0.02131438,  0.00949044,  0.03921318],
       [ 0.02048489,  0.0182263 ,  0.0408401 ],
       [-0.00410735, -0.00136997, -0.00734821]]), array([[ 0.02413957],
       [-0.10885037],
       [-0.11738599],
       [ 0.02052806]])]
*****
[[0.72199623 0.5717353  0.41975545]
 [0.57919827 0.85126057 0.46638336]
 [0.68108079 0.65657855 0.40175595]
 [0.53092073 0.89125952 0.44792792]]
*****
2


In [83]:
for l in np.arange(0, len(W)):
    W[l] += -alpha * A[l].T.dot(D[l])

In [84]:
W

[array([[-0.39242945, -0.08931696, -0.09244746, ...,  0.1097112 ,
         -0.0376283 ,  0.14222241],
        [ 0.07812406,  0.17596275, -0.28147289, ...,  0.01367364,
         -0.18064734,  0.07370911],
        [-0.00661014, -0.0481631 , -0.1191703 , ..., -0.02848184,
          0.0775578 ,  0.02914014],
        ...,
        [ 0.02645819, -0.12841049,  0.0006873 , ...,  0.02745985,
         -0.00922214,  0.11037074],
        [ 0.01031723,  0.00583354, -0.19989255, ..., -0.17854902,
          0.02411363,  0.03506759],
        [-0.15585138, -0.07710791, -0.20350624, ..., -0.02334104,
          0.11917263, -0.13783023]]),
 array([[ 1.20497231e-01,  4.72945925e-01,  8.70494012e-02,
         -3.55567034e-01, -1.30139123e-01,  1.86924305e-01,
         -1.00666220e-01, -7.08170551e-02, -1.12465419e-01,
         -3.22828423e-01,  1.24847127e-01, -4.96147900e-01,
         -1.55185808e-01, -9.19130515e-02, -4.95593241e-02,
         -2.36275558e-01,  6.92337949e-02],
        [ 2.35990590e-03, -1.