In [13]:
import numpy as np

np.random.seed(12345)


def initialize(input_dim, hidden_dim, output_dim, batch_size):
    W1 = np.random.randn(hidden_dim, input_dim) * 0.01
    b1 = np.zeros((hidden_dim,))
    W2 = np.random.randn(output_dim, hidden_dim) * 0.01
    b2 = np.zeros((output_dim,))

    parameters = [W1, b1, W2, b2]
    x = np.random.rand(input_dim, batch_size)
    y = np.random.randn(output_dim, batch_size)

    return parameters, x, y


def sigmoid(x):
    return 1 / (1 + np.exp(-x))


def deriv_sigmoid(x):
    return x * (1 - x)

In [14]:
def forward(parameters, X):
    W1, b1, W2, b2 = parameters

    batch_size = X.shape[1]
    hidden_dim = W1.shape[0]
    output_dim = W2.shape[0]

    hid = np.zeros((hidden_dim, batch_size))
    outputs = np.zeros((output_dim, batch_size))

    hid = sigmoid(np.dot(W1, X) + b1.reshape(-1,1))
    outputs = np.dot(W2, hid) + b2.reshape(-1,1)

    activations = [X, hid, outputs]

    return activations

In [15]:
def squared_loss(predictions, targets):
    """ Computes mean squared error

    predictions: (output_dim, batch_size)
    targets: (output_dim, batch_size)

    """

    loss = np.zeros(targets.shape[1])

    loss = (1./targets.shape[1]) * np.sum(np.sum(.5 * (predictions - targets)**2, axis=0))

    return np.mean(loss)

In [16]:
def deriv_squared_loss(predictions, targets):
    
    batch_size = targets.shape[1]
    dloss = np.zeros(targets.shape)

    dloss = (predictions - targets) / batch_size

    return dloss

In [17]:
def backward(activations, targets, parameters):

    X, hid, predictions = activations

    input_dim = X.shape[0]
    hidden_dim = hid.shape[0]
    output_dim = predictions.shape[0]

    W1, b1, W2, b2 = parameters

    dW1 = np.zeros((hidden_dim, input_dim))
    db1 = np.zeros((hidden_dim,))
    dW2 = np.zeros((output_dim, hidden_dim))
    db2 = np.zeros((output_dim,))

    out_error = squared_loss(predictions, targets)
    out_delta = deriv_squared_loss(predictions, targets)
    
    dhid_error = np.dot(W2.T, out_delta)
    dhid_delta = dhid_error * deriv_sigmoid(hid)
    
    dW1 = np.dot(dhid_delta, X.T)
    db1 = np.sum(dhid_delta, axis=1)
    
    dW2 = np.dot(out_delta, hid.T)
    db2 = np.sum(out_delta, axis=1)

    grads = [dW1, db1, dW2, db2]

    return grads


In [27]:
def taining(X_train, y_train, iteration = 1e5, learning_rate = 0.01):
    input_dim = X_train.shape[1]
    hidden_dim = 30
    output_dim = 10
    batch_size = 5
    
    parameters, X, Y = initialize(input_dim, hidden_dim, output_dim, batch_size)
    
    for i in xrange(0, iteration):
        

SyntaxError: unexpected EOF while parsing (<ipython-input-27-fa74bfe6e084>, line 11)

In [18]:
from sklearn.metrics import classification_report, confusion_matrix

from sklearn.model_selection import train_test_split

from sklearn.datasets import fetch_mldata

from sklearn.preprocessing import StandardScaler

from sklearn.utils import check_random_state

 

mnist = fetch_mldata('MNIST original')

X = mnist.data.astype('float64')

y = mnist.target

random_state = check_random_state(0)

 

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=1000, test_size=300, random_state= random_state)

 

scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)

X_test = scaler.transform(X_test)

In [19]:
if __name__ == '__main__':
    input_dim = 784
    hidden_dim = 30
    output_dim = 10
    batch_size = 5
    iteration = 1e5
    
    parameters, X, Y = initialize(input_dim, hidden_dim, output_dim, batch_size)
    
    ##activations = [X, hid, outputs]
    activations = forward(parameters, X_train)

ValueError: shapes (30,784) and (1000,784) not aligned: 784 (dim 1) != 1000 (dim 0)

In [23]:
y_train.shape

(1000,)

In [12]:
W1, b1, W2, b2 = parameters

batch_size = X_train.shape[1]
hidden_dim = W1.shape[0]
output_dim = W2.shape[0]

hid = np.zeros((hidden_dim, batch_size))
outputs = np.zeros((output_dim, batch_size))
    
hid = sigmoid(np.dot(W1, X) + b1.reshape(-1,1))
outputs = np.dot(W2, hid) + b2.reshape(-1,1)
activations = [X, hid, outputs]

X_train.shape[1]

784

In [1]:
from sklearn.metrics import classification_report, confusion_matrix

from sklearn.model_selection import train_test_split

from sklearn.datasets import fetch_mldata

from sklearn.preprocessing import StandardScaler

from sklearn.utils import check_random_state

 

mnist = fetch_mldata('MNIST original')

X = mnist.data.astype('float64')

y = mnist.target

random_state = check_random_state(0)

 

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=1000, test_size=300, random_state= random_state)

 

scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)

X_test = scaler.transform(X_test)

 

expected = y_test

predicted = y_test #Network output

 

print("Classification report:")

print(classification_report(expected, predicted))

print("Confusion matrix:")

print(confusion_matrix(expected, predicted))

Classification report:
             precision    recall  f1-score   support

        0.0       1.00      1.00      1.00        32
        1.0       1.00      1.00      1.00        34
        2.0       1.00      1.00      1.00        32
        3.0       1.00      1.00      1.00        31
        4.0       1.00      1.00      1.00        26
        5.0       1.00      1.00      1.00        24
        6.0       1.00      1.00      1.00        27
        7.0       1.00      1.00      1.00        31
        8.0       1.00      1.00      1.00        31
        9.0       1.00      1.00      1.00        32

avg / total       1.00      1.00      1.00       300

Confusion matrix:
[[32  0  0  0  0  0  0  0  0  0]
 [ 0 34  0  0  0  0  0  0  0  0]
 [ 0  0 32  0  0  0  0  0  0  0]
 [ 0  0  0 31  0  0  0  0  0  0]
 [ 0  0  0  0 26  0  0  0  0  0]
 [ 0  0  0  0  0 24  0  0  0  0]
 [ 0  0  0  0  0  0 27  0  0  0]
 [ 0  0  0  0  0  0  0 31  0  0]
 [ 0  0  0  0  0  0  0  0 31  0]
 [ 0  0  0  0  0  0  0 

In [31]:
y_test.shape

(300,)