In [4]:
import numpy as np
from  scipy import ndimage 

In [5]:
from matplotlib import pyplot as plt


In [6]:
from sklearn import manifold, datasets


In [7]:
from sklearn.model_selection import train_test_split

In [9]:
from sklearn.preprocessing import StandardScaler

In [10]:
from sklearn.metrics import accuracy_score

In [11]:
from sklearn.datasets import fetch_mldata


In [12]:
from sklearn.grid_search import GridSearchCV

In [13]:
from sklearn.neural_network import MLPClassifier

In [14]:
# load data

In [15]:
mnistData = datasets.load_digits(n_class=10)

In [31]:
X, Y = mnistData.data, mnistData.target

In [24]:
Y = Y.astype(int)

In [32]:
type(X)

numpy.ndarray

In [35]:
X = X[::20, :]

In [33]:
Y = Y[::10]

In [38]:
X.shape

(90, 64)

In [39]:
Y.shape

(180,)

In [45]:
num_examples = X.shape[0]      ## training set size
nn_input_dim = X.shape[1]      ## input layer dimensionality
nn_output_dim = len(np.unique(Y)) 

In [43]:
nn_output_dim

10

In [44]:
prams = {
    "lr": 0.001,
    "max_iter": 500,
    "wight_init": "xavier",
    "h_dimn": 100
    
}

In [47]:
def xavier_initialization(params):
    hdim = params["h_dimn"]
    winit = params["weight_init"]
    if winit == "random":
        np.random.seed(0)
        W1 = np.random.rand(nn_input_dim, hdim)
        b1 = np.random.rand(1, hdim)
        W2 = np.random.rand(hdim, nn_output_dim)
        b2 = np.random.rand(1, nn_output_dim)
    elif winit == "zeros":
        W1 = np.zeros((nn_input_dim, hdim))
        b1 = np.zeros((1, hdim))
        W2 = np.zeros((hdim, nn_output_dim))
        b2 = np.zeros((1, nn_output_dim))
    elif winit == "xavier":
        W1 = xavier_init(nn_input_dim, hdim)
        b1 = xavier_init(1, hdim)
        W2 = xavier_init(hdim, nn_output_dim)
        b2 = xavier_init(1, nn_output_dim)
    elif winit == "uniform":
        W1 = np.random.uniform(size=(nn_input_dim, hdim), low=-1, high=1)/np.sqrt(nn_input_dim)
        b1 = np.random.uniform(size=(1, hdim), low=-1, high=1)
        W2 = np.random.uniform(size=(hdim, nn_output_dim), low=-1, high=1)/np.sqrt(hdim)
        b2 = np.random.uniform(size=(1, nn_output_dim), low=-1, high=1)
    elif winit == "normal":
        W1 = np.random.normal(loc = 0, scale = 0.5, size = (nn_input_dim, hdim))
        b1 = np.random.normal(loc = 0, scale = 0.5, size=(1, hdim))
        W2 = np.random.normal(loc = 0, scale = 0.5, size = (hdim, nn_output_dim))
        b2 = np.random.normal(loc = 0, scale = 0.5, size=(1, nn_output_dim))
    return W1, b1, W2, b2 

    

In [49]:
def softmax(x):
    exp_scores = np.exp(x)
    probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)

In [50]:
def build_model():
    W1, b1, W2, b2 = weight_initialization(params)
    # This is what we return at the end
    model = { 'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}
    return model

In [51]:
def feedforward(model, x):
    W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
    z1 = x.dot(W1) + b1
    a1 = np.tanh(z1)
    z2 = a1.dot(W2)+ b2
    probs = softmax(z2)
    return a1, probs

In [53]:
def calculate_loss(model, x , y):
    W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
    _, probs = feedforward(model, x)
    
    # calculare crosss entropy
    correct_logprobs = -np.log(probs(y.shape[0], y))
    data_loss = np.sum(correct_logprobs)
    
    return 1./y.shape[0] * data_loss

In [54]:
def test(model, x, y):
    W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
    # Forward propagation to calculate predictions
    _, probs = feedforward(model, x)
    preds = np.argmax(probs, axis=1)
    return np.count_nonzero(y==preds)/y.shape[0]

    

In [55]:
def train(model, X_train, X_test, Y_train, Y_test, verbose=True):
    # Gradient descent. For each batch...
    W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
    for i in range(0, params["max_iter"]):

        # Forward propagation
        a1, probs = feedforward(model, X_train)

        # Backpropagation
        dW2, db2, dW1, db1 = backpropagation(model, X_train, Y_train, a1, probs)

        # Gradient descent parameter update
        W1 += -params["lr"] * dW1
        b1 += -params["lr"] * db1
        W2 += -params["lr"] * dW2
        b2 += -params["lr"] * db2
        
        # Assign new parameters to the model
        model = { 'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}
        if verbose and i % 50 == 0:
            print("Loss after iteration %i: %f" %(i, calculate_loss(model, X_train, Y_train)),
                  ", Test accuracy:", test(model, X_test, Y_test), "\n")
    return model

In [56]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.4)
t = ["xavier","uniform","normal","zeros","random"]

for i in range(5):
    params["weight_init"] = t[i]
    model = build_model()
    model = train(model, X_train, X_test, Y_train, Y_test, verbose=False)
    print(params, "TestAccuracy=", test(model,X_test, Y_test))
    

ValueError: Found input variables with inconsistent numbers of samples: [90, 180]